diff --git a/rconfigure.py b/rconfigure.py index c3b008752..da584af63 100644 --- a/rconfigure.py +++ b/rconfigure.py @@ -4,11 +4,8 @@ import subprocess import platform -extensions = ['parquet'] - -# check if there are any additional extensions being requested -if 'DUCKDB_R_EXTENSIONS' in os.environ: - extensions = extensions + os.environ['DUCKDB_R_EXTENSIONS'].split(",") +# We always load extensions externally +extensions = [] unity_build = 20 if 'DUCKDB_BUILD_UNITY' in os.environ: @@ -36,11 +33,7 @@ def open_utf8(fpath, flags): return open(fpath, flags, encoding="utf8") -extension_list = "" - -for ext in extensions: - extension_list += ' -DDUCKDB_EXTENSION_{}_LINKED'.format(ext.upper()) - extension_list += " -DDUCKDB_BUILD_LIBRARY" +extension_list = " -DDUCKDB_BUILD_LIBRARY" libraries = [] if platform.system() == 'Windows': @@ -105,6 +98,10 @@ def open_utf8(fpath, flags): include_list += extension_list include_list += debug_move_flag +# Autoloading is on by default for R builds +include_list += " -DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1" +include_list += " -DDUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1" + # add -Werror if enabled if 'TREAT_WARNINGS_AS_ERRORS' in os.environ: include_list += ' -Werror' diff --git a/src/Makevars b/src/Makevars index 9448cbdfb..d1feffa6c 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,5 @@ include include/sources.mk CXX_STD = CXX17 -PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/snappy -Iduckdb/third_party/thrift -Iduckdb/third_party/zstd/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY +PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -I../inst/include -Iduckdb -DDUCKDB_BUILD_LIBRARY -DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1 -DDUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1 OBJECTS=database.o connection.o statement.o register.o relational.o scan.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES) diff --git a/src/Makevars.win b/src/Makevars.win index c9b0605ab..eb38dc3a9 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,5 +1,5 @@ include include/sources.mk CXX_STD = CXX17 -PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -Iduckdb/extension/parquet/include -Iduckdb/third_party/parquet -Iduckdb/third_party/snappy -Iduckdb/third_party/thrift -Iduckdb/third_party/zstd/include -I../inst/include -Iduckdb -DDUCKDB_EXTENSION_PARQUET_LINKED -DDUCKDB_BUILD_LIBRARY -DDUCKDB_PLATFORM_RTOOLS=1 +PKG_CPPFLAGS = -Iinclude -I../inst/include -DDUCKDB_DISABLE_PRINT -DDUCKDB_R_BUILD -Iduckdb/src/include -Iduckdb/third_party/fmt/include -Iduckdb/third_party/fsst -Iduckdb/third_party/re2 -Iduckdb/third_party/miniz -Iduckdb/third_party/utf8proc/include -Iduckdb/third_party/utf8proc -Iduckdb/third_party/hyperloglog -Iduckdb/third_party/fastpforlib -Iduckdb/third_party/tdigest -Iduckdb/third_party/libpg_query/include -Iduckdb/third_party/libpg_query -Iduckdb/third_party/concurrentqueue -Iduckdb/third_party/pcg -Iduckdb/third_party/httplib -Iduckdb/third_party/fast_float -Iduckdb/third_party/mbedtls -Iduckdb/third_party/mbedtls/include -Iduckdb/third_party/mbedtls/library -Iduckdb/third_party/jaro_winkler -Iduckdb/third_party/jaro_winkler/details -I../inst/include -Iduckdb -DDUCKDB_BUILD_LIBRARY -DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1 -DDUCKDB_EXTENSION_AUTOINSTALL_DEFAULT=1 -DDUCKDB_PLATFORM_RTOOLS=1 OBJECTS=database.o connection.o statement.o register.o relational.o scan.o transform.o utils.o reltoaltrep.o types.o cpp11.o $(SOURCES) PKG_LIBS=-lws2_32 diff --git a/src/duckdb/extension/parquet/column_reader.cpp b/src/duckdb/extension/parquet/column_reader.cpp deleted file mode 100644 index 0a0510a8d..000000000 --- a/src/duckdb/extension/parquet/column_reader.cpp +++ /dev/null @@ -1,1457 +0,0 @@ -#include "column_reader.hpp" - -#include "boolean_column_reader.hpp" -#include "callback_column_reader.hpp" -#include "cast_column_reader.hpp" -#include "duckdb.hpp" -#include "list_column_reader.hpp" -#include "miniz_wrapper.hpp" -#include "parquet_decimal_utils.hpp" -#include "parquet_reader.hpp" -#include "parquet_timestamp.hpp" -#include "row_number_column_reader.hpp" -#include "snappy.h" -#include "string_column_reader.hpp" -#include "struct_column_reader.hpp" -#include "templated_column_reader.hpp" -#include "utf8proc_wrapper.hpp" -#include "zstd.h" - -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/bit.hpp" -#include "duckdb/common/types/blob.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#endif - -namespace duckdb { - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::PageType; -using duckdb_parquet::format::Type; - -const uint64_t ParquetDecodeUtils::BITPACK_MASKS[] = {0, - 1, - 3, - 7, - 15, - 31, - 63, - 127, - 255, - 511, - 1023, - 2047, - 4095, - 8191, - 16383, - 32767, - 65535, - 131071, - 262143, - 524287, - 1048575, - 2097151, - 4194303, - 8388607, - 16777215, - 33554431, - 67108863, - 134217727, - 268435455, - 536870911, - 1073741823, - 2147483647, - 4294967295, - 8589934591, - 17179869183, - 34359738367, - 68719476735, - 137438953471, - 274877906943, - 549755813887, - 1099511627775, - 2199023255551, - 4398046511103, - 8796093022207, - 17592186044415, - 35184372088831, - 70368744177663, - 140737488355327, - 281474976710655, - 562949953421311, - 1125899906842623, - 2251799813685247, - 4503599627370495, - 9007199254740991, - 18014398509481983, - 36028797018963967, - 72057594037927935, - 144115188075855871, - 288230376151711743, - 576460752303423487, - 1152921504606846975, - 2305843009213693951, - 4611686018427387903, - 9223372036854775807, - 18446744073709551615ULL}; - -const uint64_t ParquetDecodeUtils::BITPACK_MASKS_SIZE = sizeof(ParquetDecodeUtils::BITPACK_MASKS) / sizeof(uint64_t); - -const uint8_t ParquetDecodeUtils::BITPACK_DLEN = 8; - -ColumnReader::ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : schema(schema_p), file_idx(file_idx_p), max_define(max_define_p), max_repeat(max_repeat_p), reader(reader), - type(std::move(type_p)), page_rows_available(0) { - - // dummies for Skip() - dummy_define.resize(reader.allocator, STANDARD_VECTOR_SIZE); - dummy_repeat.resize(reader.allocator, STANDARD_VECTOR_SIZE); -} - -ColumnReader::~ColumnReader() { -} - -Allocator &ColumnReader::GetAllocator() { - return reader.allocator; -} - -ParquetReader &ColumnReader::Reader() { - return reader; -} - -const LogicalType &ColumnReader::Type() const { - return type; -} - -const SchemaElement &ColumnReader::Schema() const { - return schema; -} - -idx_t ColumnReader::FileIdx() const { - return file_idx; -} - -idx_t ColumnReader::MaxDefine() const { - return max_define; -} - -idx_t ColumnReader::MaxRepeat() const { - return max_repeat; -} - -void ColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) { - if (chunk) { - uint64_t size = chunk->meta_data.total_compressed_size; - transport.RegisterPrefetch(FileOffset(), size, allow_merge); - } -} - -uint64_t ColumnReader::TotalCompressedSize() { - if (!chunk) { - return 0; - } - - return chunk->meta_data.total_compressed_size; -} - -// Note: It's not trivial to determine where all Column data is stored. Chunk->file_offset -// apparently is not the first page of the data. Therefore we determine the address of the first page by taking the -// minimum of all page offsets. -idx_t ColumnReader::FileOffset() const { - if (!chunk) { - throw std::runtime_error("FileOffset called on ColumnReader with no chunk"); - } - auto min_offset = NumericLimits::Maximum(); - if (chunk->meta_data.__isset.dictionary_page_offset) { - min_offset = MinValue(min_offset, chunk->meta_data.dictionary_page_offset); - } - if (chunk->meta_data.__isset.index_page_offset) { - min_offset = MinValue(min_offset, chunk->meta_data.index_page_offset); - } - min_offset = MinValue(min_offset, chunk->meta_data.data_page_offset); - - return min_offset; -} - -idx_t ColumnReader::GroupRowsAvailable() { - return group_rows_available; -} - -unique_ptr ColumnReader::Stats(idx_t row_group_idx_p, const vector &columns) { - if (Type().id() == LogicalTypeId::LIST || Type().id() == LogicalTypeId::STRUCT || - Type().id() == LogicalTypeId::MAP) { - return nullptr; - } - return ParquetStatisticsUtils::TransformColumnStatistics(Schema(), Type(), columns[file_idx]); -} - -void ColumnReader::Plain(shared_ptr plain_data, uint8_t *defines, idx_t num_values, // NOLINT - parquet_filter_t &filter, idx_t result_offset, Vector &result) { - throw NotImplementedException("Plain"); -} - -void ColumnReader::Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - throw NotImplementedException("Dictionary"); -} - -void ColumnReader::Offsets(uint32_t *offsets, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) { - throw NotImplementedException("Offsets"); -} - -void ColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) { - throw std::runtime_error("DELTA_LENGTH_BYTE_ARRAY encoding is only supported for text or binary data"); -} - -void ColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) { - throw std::runtime_error("DELTA_BYTE_ARRAY encoding is only supported for text or binary data"); -} - -void ColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, // NOLINT - parquet_filter_t &filter, idx_t result_offset, Vector &result) { - throw NotImplementedException("DeltaByteArray"); -} - -void ColumnReader::DictReference(Vector &result) { -} -void ColumnReader::PlainReference(shared_ptr, Vector &result) { // NOLINT -} - -void ColumnReader::InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) { - D_ASSERT(file_idx < columns.size()); - chunk = &columns[file_idx]; - protocol = &protocol_p; - D_ASSERT(chunk); - D_ASSERT(chunk->__isset.meta_data); - - if (chunk->__isset.file_path) { - throw std::runtime_error("Only inlined data files are supported (no references)"); - } - - // ugh. sometimes there is an extra offset for the dict. sometimes it's wrong. - chunk_read_offset = chunk->meta_data.data_page_offset; - if (chunk->meta_data.__isset.dictionary_page_offset && chunk->meta_data.dictionary_page_offset >= 4) { - // this assumes the data pages follow the dict pages directly. - chunk_read_offset = chunk->meta_data.dictionary_page_offset; - } - group_rows_available = chunk->meta_data.num_values; -} - -void ColumnReader::PrepareRead(parquet_filter_t &filter) { - dict_decoder.reset(); - defined_decoder.reset(); - block.reset(); - PageHeader page_hdr; - page_hdr.read(protocol); - - switch (page_hdr.type) { - case PageType::DATA_PAGE_V2: - PreparePageV2(page_hdr); - PrepareDataPage(page_hdr); - break; - case PageType::DATA_PAGE: - PreparePage(page_hdr); - PrepareDataPage(page_hdr); - break; - case PageType::DICTIONARY_PAGE: - PreparePage(page_hdr); - Dictionary(std::move(block), page_hdr.dictionary_page_header.num_values); - break; - default: - break; // ignore INDEX page type and any other custom extensions - } - ResetPage(); -} - -void ColumnReader::ResetPage() { -} - -void ColumnReader::PreparePageV2(PageHeader &page_hdr) { - D_ASSERT(page_hdr.type == PageType::DATA_PAGE_V2); - - auto &trans = reinterpret_cast(*protocol->getTransport()); - - AllocateBlock(page_hdr.uncompressed_page_size + 1); - bool uncompressed = false; - if (page_hdr.data_page_header_v2.__isset.is_compressed && !page_hdr.data_page_header_v2.is_compressed) { - uncompressed = true; - } - if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) { - if (page_hdr.compressed_page_size != page_hdr.uncompressed_page_size) { - throw std::runtime_error("Page size mismatch"); - } - uncompressed = true; - } - if (uncompressed) { - trans.read(block->ptr, page_hdr.compressed_page_size); - return; - } - - // copy repeats & defines as-is because FOR SOME REASON they are uncompressed - auto uncompressed_bytes = page_hdr.data_page_header_v2.repetition_levels_byte_length + - page_hdr.data_page_header_v2.definition_levels_byte_length; - trans.read(block->ptr, uncompressed_bytes); - - auto compressed_bytes = page_hdr.compressed_page_size - uncompressed_bytes; - - AllocateCompressed(compressed_bytes); - trans.read(compressed_buffer.ptr, compressed_bytes); - - DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, compressed_bytes, block->ptr + uncompressed_bytes, - page_hdr.uncompressed_page_size - uncompressed_bytes); -} - -void ColumnReader::AllocateBlock(idx_t size) { - if (!block) { - block = make_shared(GetAllocator(), size); - } else { - block->resize(GetAllocator(), size); - } -} - -void ColumnReader::AllocateCompressed(idx_t size) { - compressed_buffer.resize(GetAllocator(), size); -} - -void ColumnReader::PreparePage(PageHeader &page_hdr) { - auto &trans = reinterpret_cast(*protocol->getTransport()); - - AllocateBlock(page_hdr.uncompressed_page_size + 1); - if (chunk->meta_data.codec == CompressionCodec::UNCOMPRESSED) { - if (page_hdr.compressed_page_size != page_hdr.uncompressed_page_size) { - throw std::runtime_error("Page size mismatch"); - } - trans.read((uint8_t *)block->ptr, page_hdr.compressed_page_size); - return; - } - - AllocateCompressed(page_hdr.compressed_page_size + 1); - trans.read((uint8_t *)compressed_buffer.ptr, page_hdr.compressed_page_size); - - DecompressInternal(chunk->meta_data.codec, compressed_buffer.ptr, page_hdr.compressed_page_size, block->ptr, - page_hdr.uncompressed_page_size); -} - -void ColumnReader::DecompressInternal(CompressionCodec::type codec, const_data_ptr_t src, idx_t src_size, - data_ptr_t dst, idx_t dst_size) { - switch (codec) { - case CompressionCodec::UNCOMPRESSED: - throw InternalException("Parquet data unexpectedly uncompressed"); - case CompressionCodec::GZIP: { - MiniZStream s; - s.Decompress(const_char_ptr_cast(src), src_size, char_ptr_cast(dst), dst_size); - break; - } - case CompressionCodec::SNAPPY: { - { - size_t uncompressed_size = 0; - auto res = duckdb_snappy::GetUncompressedLength(const_char_ptr_cast(src), src_size, &uncompressed_size); - if (!res) { - throw std::runtime_error("Snappy decompression failure"); - } - if (uncompressed_size != (size_t)dst_size) { - throw std::runtime_error("Snappy decompression failure: Uncompressed data size mismatch"); - } - } - auto res = duckdb_snappy::RawUncompress(const_char_ptr_cast(src), src_size, char_ptr_cast(dst)); - if (!res) { - throw std::runtime_error("Snappy decompression failure"); - } - break; - } - case CompressionCodec::ZSTD: { - auto res = duckdb_zstd::ZSTD_decompress(dst, dst_size, src, src_size); - if (duckdb_zstd::ZSTD_isError(res) || res != (size_t)dst_size) { - throw std::runtime_error("ZSTD Decompression failure"); - } - break; - } - default: { - std::stringstream codec_name; - codec_name << codec; - throw std::runtime_error("Unsupported compression codec \"" + codec_name.str() + - "\". Supported options are uncompressed, gzip, snappy or zstd"); - } - } -} - -void ColumnReader::PrepareDataPage(PageHeader &page_hdr) { - if (page_hdr.type == PageType::DATA_PAGE && !page_hdr.__isset.data_page_header) { - throw std::runtime_error("Missing data page header from data page"); - } - if (page_hdr.type == PageType::DATA_PAGE_V2 && !page_hdr.__isset.data_page_header_v2) { - throw std::runtime_error("Missing data page header from data page v2"); - } - - bool is_v1 = page_hdr.type == PageType::DATA_PAGE; - bool is_v2 = page_hdr.type == PageType::DATA_PAGE_V2; - auto &v1_header = page_hdr.data_page_header; - auto &v2_header = page_hdr.data_page_header_v2; - - page_rows_available = is_v1 ? v1_header.num_values : v2_header.num_values; - auto page_encoding = is_v1 ? v1_header.encoding : v2_header.encoding; - - if (HasRepeats()) { - uint32_t rep_length = is_v1 ? block->read() : v2_header.repetition_levels_byte_length; - block->available(rep_length); - repeated_decoder = make_uniq(block->ptr, rep_length, RleBpDecoder::ComputeBitWidth(max_repeat)); - block->inc(rep_length); - } else if (is_v2 && v2_header.repetition_levels_byte_length > 0) { - block->inc(v2_header.repetition_levels_byte_length); - } - - if (HasDefines()) { - uint32_t def_length = is_v1 ? block->read() : v2_header.definition_levels_byte_length; - block->available(def_length); - defined_decoder = make_uniq(block->ptr, def_length, RleBpDecoder::ComputeBitWidth(max_define)); - block->inc(def_length); - } else if (is_v2 && v2_header.definition_levels_byte_length > 0) { - block->inc(v2_header.definition_levels_byte_length); - } - - switch (page_encoding) { - case Encoding::RLE_DICTIONARY: - case Encoding::PLAIN_DICTIONARY: { - // where is it otherwise?? - auto dict_width = block->read(); - // TODO somehow dict_width can be 0 ? - dict_decoder = make_uniq(block->ptr, block->len, dict_width); - block->inc(block->len); - break; - } - case Encoding::RLE: { - if (type.id() != LogicalTypeId::BOOLEAN) { - throw std::runtime_error("RLE encoding is only supported for boolean data"); - } - block->inc(sizeof(uint32_t)); - rle_decoder = make_uniq(block->ptr, block->len, 1); - break; - } - case Encoding::DELTA_BINARY_PACKED: { - dbp_decoder = make_uniq(block->ptr, block->len); - block->inc(block->len); - break; - } - case Encoding::DELTA_LENGTH_BYTE_ARRAY: { - PrepareDeltaLengthByteArray(*block); - break; - } - case Encoding::DELTA_BYTE_ARRAY: { - PrepareDeltaByteArray(*block); - break; - } - case Encoding::PLAIN: - // nothing to do here, will be read directly below - break; - - default: - throw std::runtime_error("Unsupported page encoding"); - } -} - -idx_t ColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result) { - // we need to reset the location because multiple column readers share the same protocol - auto &trans = reinterpret_cast(*protocol->getTransport()); - trans.SetLocation(chunk_read_offset); - - // Perform any skips that were not applied yet. - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - idx_t result_offset = 0; - auto to_read = num_values; - - while (to_read > 0) { - while (page_rows_available == 0) { - PrepareRead(filter); - } - - D_ASSERT(block); - auto read_now = MinValue(to_read, page_rows_available); - - D_ASSERT(read_now <= STANDARD_VECTOR_SIZE); - - if (HasRepeats()) { - D_ASSERT(repeated_decoder); - repeated_decoder->GetBatch(repeat_out + result_offset, read_now); - } - - if (HasDefines()) { - D_ASSERT(defined_decoder); - defined_decoder->GetBatch(define_out + result_offset, read_now); - } - - idx_t null_count = 0; - - if ((dict_decoder || dbp_decoder || rle_decoder) && HasDefines()) { - // we need the null count because the dictionary offsets have no entries for nulls - for (idx_t i = 0; i < read_now; i++) { - if (define_out[i + result_offset] != max_define) { - null_count++; - } - } - } - - if (dict_decoder) { - offset_buffer.resize(reader.allocator, sizeof(uint32_t) * (read_now - null_count)); - dict_decoder->GetBatch(offset_buffer.ptr, read_now - null_count); - DictReference(result); - Offsets(reinterpret_cast(offset_buffer.ptr), define_out, read_now, filter, result_offset, - result); - } else if (dbp_decoder) { - // TODO keep this in the state - auto read_buf = make_shared(); - - switch (schema.type) { - case duckdb_parquet::format::Type::INT32: - read_buf->resize(reader.allocator, sizeof(int32_t) * (read_now - null_count)); - dbp_decoder->GetBatch(read_buf->ptr, read_now - null_count); - - break; - case duckdb_parquet::format::Type::INT64: - read_buf->resize(reader.allocator, sizeof(int64_t) * (read_now - null_count)); - dbp_decoder->GetBatch(read_buf->ptr, read_now - null_count); - break; - - default: - throw std::runtime_error("DELTA_BINARY_PACKED should only be INT32 or INT64"); - } - // Plain() will put NULLs in the right place - Plain(read_buf, define_out, read_now, filter, result_offset, result); - } else if (rle_decoder) { - // RLE encoding for boolean - D_ASSERT(type.id() == LogicalTypeId::BOOLEAN); - auto read_buf = make_shared(); - read_buf->resize(reader.allocator, sizeof(bool) * (read_now - null_count)); - rle_decoder->GetBatch(read_buf->ptr, read_now - null_count); - PlainTemplated>(read_buf, define_out, read_now, filter, - result_offset, result); - } else if (byte_array_data) { - // DELTA_BYTE_ARRAY or DELTA_LENGTH_BYTE_ARRAY - DeltaByteArray(define_out, read_now, filter, result_offset, result); - } else { - PlainReference(block, result); - Plain(block, define_out, read_now, filter, result_offset, result); - } - - result_offset += read_now; - page_rows_available -= read_now; - to_read -= read_now; - } - group_rows_available -= num_values; - chunk_read_offset = trans.GetLocation(); - - return num_values; -} - -void ColumnReader::Skip(idx_t num_values) { - pending_skips += num_values; -} - -void ColumnReader::ApplyPendingSkips(idx_t num_values) { - pending_skips -= num_values; - - dummy_define.zero(); - dummy_repeat.zero(); - - // TODO this can be optimized, for example we dont actually have to bitunpack offsets - Vector dummy_result(type, nullptr); - - idx_t remaining = num_values; - idx_t read = 0; - - while (remaining) { - idx_t to_read = MinValue(remaining, STANDARD_VECTOR_SIZE); - read += Read(to_read, none_filter, dummy_define.ptr, dummy_repeat.ptr, dummy_result); - remaining -= to_read; - } - - if (read != num_values) { - throw std::runtime_error("Row count mismatch when skipping rows"); - } -} - -//===--------------------------------------------------------------------===// -// String Column Reader -//===--------------------------------------------------------------------===// -StringColumnReader::StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, - max_define_p, max_repeat_p) { - fixed_width_string_length = 0; - if (schema_p.type == Type::FIXED_LEN_BYTE_ARRAY) { - D_ASSERT(schema_p.__isset.type_length); - fixed_width_string_length = schema_p.type_length; - } -} - -uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) { - if (!is_varchar) { - return str_len; - } - // verify if a string is actually UTF8, and if there are no null bytes in the middle of the string - // technically Parquet should guarantee this, but reality is often disappointing - UnicodeInvalidReason reason; - size_t pos; - auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos); - if (utf_type == UnicodeType::INVALID) { - throw InvalidInputException("Invalid string encoding found in Parquet file: value \"" + - Blob::ToString(string_t(str_data, str_len)) + "\" is not valid UTF8!"); - } - return str_len; -} - -uint32_t StringColumnReader::VerifyString(const char *str_data, uint32_t str_len) { - return VerifyString(str_data, str_len, Type() == LogicalTypeId::VARCHAR); -} - -void StringColumnReader::Dictionary(shared_ptr data, idx_t num_entries) { - dict = std::move(data); - dict_strings = unique_ptr(new string_t[num_entries]); - for (idx_t dict_idx = 0; dict_idx < num_entries; dict_idx++) { - uint32_t str_len; - if (fixed_width_string_length == 0) { - // variable length string: read from dictionary - str_len = dict->read(); - } else { - // fixed length string - str_len = fixed_width_string_length; - } - dict->available(str_len); - - auto dict_str = reinterpret_cast(dict->ptr); - auto actual_str_len = VerifyString(dict_str, str_len); - dict_strings[dict_idx] = string_t(dict_str, actual_str_len); - dict->inc(str_len); - } -} - -static shared_ptr ReadDbpData(Allocator &allocator, ResizeableBuffer &buffer, idx_t &value_count) { - auto decoder = make_uniq(buffer.ptr, buffer.len); - value_count = decoder->TotalValues(); - auto result = make_shared(); - result->resize(allocator, sizeof(uint32_t) * value_count); - decoder->GetBatch(result->ptr, value_count); - decoder->Finalize(); - buffer.inc(buffer.len - decoder->BufferPtr().len); - return result; -} - -void StringColumnReader::PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) { - idx_t value_count; - auto length_buffer = ReadDbpData(reader.allocator, buffer, value_count); - if (value_count == 0) { - // no values - byte_array_data = make_uniq(LogicalType::VARCHAR, nullptr); - return; - } - auto length_data = reinterpret_cast(length_buffer->ptr); - byte_array_data = make_uniq(LogicalType::VARCHAR, value_count); - byte_array_count = value_count; - delta_offset = 0; - auto string_data = FlatVector::GetData(*byte_array_data); - for (idx_t i = 0; i < value_count; i++) { - auto str_len = length_data[i]; - string_data[i] = StringVector::EmptyString(*byte_array_data, str_len); - auto result_data = string_data[i].GetDataWriteable(); - memcpy(result_data, buffer.ptr, length_data[i]); - buffer.inc(length_data[i]); - string_data[i].Finalize(); - } -} - -void StringColumnReader::PrepareDeltaByteArray(ResizeableBuffer &buffer) { - idx_t prefix_count, suffix_count; - auto prefix_buffer = ReadDbpData(reader.allocator, buffer, prefix_count); - auto suffix_buffer = ReadDbpData(reader.allocator, buffer, suffix_count); - if (prefix_count != suffix_count) { - throw std::runtime_error("DELTA_BYTE_ARRAY - prefix and suffix counts are different - corrupt file?"); - } - if (prefix_count == 0) { - // no values - byte_array_data = make_uniq(LogicalType::VARCHAR, nullptr); - return; - } - auto prefix_data = reinterpret_cast(prefix_buffer->ptr); - auto suffix_data = reinterpret_cast(suffix_buffer->ptr); - byte_array_data = make_uniq(LogicalType::VARCHAR, prefix_count); - byte_array_count = prefix_count; - delta_offset = 0; - auto string_data = FlatVector::GetData(*byte_array_data); - for (idx_t i = 0; i < prefix_count; i++) { - auto str_len = prefix_data[i] + suffix_data[i]; - string_data[i] = StringVector::EmptyString(*byte_array_data, str_len); - auto result_data = string_data[i].GetDataWriteable(); - if (prefix_data[i] > 0) { - if (i == 0 || prefix_data[i] > string_data[i - 1].GetSize()) { - throw std::runtime_error("DELTA_BYTE_ARRAY - prefix is out of range - corrupt file?"); - } - memcpy(result_data, string_data[i - 1].GetData(), prefix_data[i]); - } - memcpy(result_data + prefix_data[i], buffer.ptr, suffix_data[i]); - buffer.inc(suffix_data[i]); - string_data[i].Finalize(); - } -} - -void StringColumnReader::DeltaByteArray(uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) { - if (!byte_array_data) { - throw std::runtime_error("Internal error - DeltaByteArray called but there was no byte_array_data set"); - } - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - auto string_data = FlatVector::GetData(*byte_array_data); - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - result_mask.SetInvalid(row_idx + result_offset); - continue; - } - if (filter[row_idx + result_offset]) { - if (delta_offset >= byte_array_count) { - throw IOException("DELTA_BYTE_ARRAY - length mismatch between values and byte array lengths (attempted " - "read of %d from %d entries) - corrupt file?", - delta_offset + 1, byte_array_count); - } - result_ptr[row_idx + result_offset] = string_data[delta_offset++]; - } else { - delta_offset++; - } - } - StringVector::AddHeapReference(result, *byte_array_data); -} - -class ParquetStringVectorBuffer : public VectorBuffer { -public: - explicit ParquetStringVectorBuffer(shared_ptr buffer_p) - : VectorBuffer(VectorBufferType::OPAQUE_BUFFER), buffer(std::move(buffer_p)) { - } - -private: - shared_ptr buffer; -}; - -void StringColumnReader::DictReference(Vector &result) { - StringVector::AddBuffer(result, make_buffer(dict)); -} -void StringColumnReader::PlainReference(shared_ptr plain_data, Vector &result) { - StringVector::AddBuffer(result, make_buffer(std::move(plain_data))); -} - -string_t StringParquetValueConversion::DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto &dict_strings = reader.Cast().dict_strings; - return dict_strings[offset]; -} - -string_t StringParquetValueConversion::PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = reader.Cast(); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.available(str_len); - auto plain_str = char_ptr_cast(plain_data.ptr); - auto actual_str_len = reader.Cast().VerifyString(plain_str, str_len); - auto ret_str = string_t(plain_str, actual_str_len); - plain_data.inc(str_len); - return ret_str; -} - -void StringParquetValueConversion::PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - auto &scr = reader.Cast(); - uint32_t str_len = scr.fixed_width_string_length == 0 ? plain_data.read() : scr.fixed_width_string_length; - plain_data.inc(str_len); -} - -//===--------------------------------------------------------------------===// -// List Column Reader -//===--------------------------------------------------------------------===// -idx_t ListColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, - data_ptr_t repeat_out, Vector &result_out) { - idx_t result_offset = 0; - auto result_ptr = FlatVector::GetData(result_out); - auto &result_mask = FlatVector::Validity(result_out); - - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - D_ASSERT(ListVector::GetListSize(result_out) == 0); - // if an individual list is longer than STANDARD_VECTOR_SIZE we actually have to loop the child read to fill it - bool finished = false; - while (!finished) { - idx_t child_actual_num_values = 0; - - // check if we have any overflow from a previous read - if (overflow_child_count == 0) { - // we don't: read elements from the child reader - child_defines.zero(); - child_repeats.zero(); - // we don't know in advance how many values to read because of the beautiful repetition/definition setup - // we just read (up to) a vector from the child column, and see if we have read enough - // if we have not read enough, we read another vector - // if we have read enough, we leave any unhandled elements in the overflow vector for a subsequent read - auto child_req_num_values = - MinValue(STANDARD_VECTOR_SIZE, child_column_reader->GroupRowsAvailable()); - read_vector.ResetFromCache(read_cache); - child_actual_num_values = child_column_reader->Read(child_req_num_values, child_filter, child_defines_ptr, - child_repeats_ptr, read_vector); - } else { - // we do: use the overflow values - child_actual_num_values = overflow_child_count; - overflow_child_count = 0; - } - - if (child_actual_num_values == 0) { - // no more elements available: we are done - break; - } - read_vector.Verify(child_actual_num_values); - idx_t current_chunk_offset = ListVector::GetListSize(result_out); - - // hard-won piece of code this, modify at your own risk - // the intuition is that we have to only collapse values into lists that are repeated *on this level* - // the rest is pretty much handed up as-is as a single-valued list or NULL - idx_t child_idx; - for (child_idx = 0; child_idx < child_actual_num_values; child_idx++) { - if (child_repeats_ptr[child_idx] == max_repeat) { - // value repeats on this level, append - D_ASSERT(result_offset > 0); - result_ptr[result_offset - 1].length++; - continue; - } - - if (result_offset >= num_values) { - // we ran out of output space - finished = true; - break; - } - if (child_defines_ptr[child_idx] >= max_define) { - // value has been defined down the stack, hence its NOT NULL - result_ptr[result_offset].offset = child_idx + current_chunk_offset; - result_ptr[result_offset].length = 1; - } else if (child_defines_ptr[child_idx] == max_define - 1) { - // empty list - result_ptr[result_offset].offset = child_idx + current_chunk_offset; - result_ptr[result_offset].length = 0; - } else { - // value is NULL somewhere up the stack - result_mask.SetInvalid(result_offset); - result_ptr[result_offset].offset = 0; - result_ptr[result_offset].length = 0; - } - - repeat_out[result_offset] = child_repeats_ptr[child_idx]; - define_out[result_offset] = child_defines_ptr[child_idx]; - - result_offset++; - } - // actually append the required elements to the child list - ListVector::Append(result_out, read_vector, child_idx); - - // we have read more values from the child reader than we can fit into the result for this read - // we have to pass everything from child_idx to child_actual_num_values into the next call - if (child_idx < child_actual_num_values && result_offset == num_values) { - read_vector.Slice(read_vector, child_idx, child_actual_num_values); - overflow_child_count = child_actual_num_values - child_idx; - read_vector.Verify(overflow_child_count); - - // move values in the child repeats and defines *backward* by child_idx - for (idx_t repdef_idx = 0; repdef_idx < overflow_child_count; repdef_idx++) { - child_defines_ptr[repdef_idx] = child_defines_ptr[child_idx + repdef_idx]; - child_repeats_ptr[repdef_idx] = child_repeats_ptr[child_idx + repdef_idx]; - } - } - } - result_out.Verify(result_offset); - return result_offset; -} - -ListColumnReader::ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p, - unique_ptr child_column_reader_p) - : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_column_reader(std::move(child_column_reader_p)), - read_cache(reader.allocator, ListType::GetChildType(Type())), read_vector(read_cache), overflow_child_count(0) { - - child_defines.resize(reader.allocator, STANDARD_VECTOR_SIZE); - child_repeats.resize(reader.allocator, STANDARD_VECTOR_SIZE); - child_defines_ptr = (uint8_t *)child_defines.ptr; - child_repeats_ptr = (uint8_t *)child_repeats.ptr; - - child_filter.set(); -} - -void ListColumnReader::ApplyPendingSkips(idx_t num_values) { - pending_skips -= num_values; - - auto define_out = unique_ptr(new uint8_t[num_values]); - auto repeat_out = unique_ptr(new uint8_t[num_values]); - - idx_t remaining = num_values; - idx_t read = 0; - - while (remaining) { - Vector result_out(Type()); - parquet_filter_t filter; - idx_t to_read = MinValue(remaining, STANDARD_VECTOR_SIZE); - read += Read(to_read, filter, define_out.get(), repeat_out.get(), result_out); - remaining -= to_read; - } - - if (read != num_values) { - throw InternalException("Not all skips done!"); - } -} - -//===--------------------------------------------------------------------===// -// Row NumberColumn Reader -//===--------------------------------------------------------------------===// -RowNumberColumnReader::RowNumberColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p) - : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p) { -} - -unique_ptr RowNumberColumnReader::Stats(idx_t row_group_idx_p, const vector &columns) { - auto stats = NumericStats::CreateUnknown(type); - auto &row_groups = reader.GetFileMetadata()->row_groups; - D_ASSERT(row_group_idx_p < row_groups.size()); - idx_t row_group_offset_min = 0; - for (idx_t i = 0; i < row_group_idx_p; i++) { - row_group_offset_min += row_groups[i].num_rows; - } - - NumericStats::SetMin(stats, Value::BIGINT(row_group_offset_min)); - NumericStats::SetMax(stats, Value::BIGINT(row_group_offset_min + row_groups[row_group_idx_p].num_rows)); - stats.Set(StatsInfo::CANNOT_HAVE_NULL_VALUES); - return stats.ToUnique(); -} - -void RowNumberColumnReader::InitializeRead(idx_t row_group_idx_p, const vector &columns, - TProtocol &protocol_p) { - row_group_offset = 0; - auto &row_groups = reader.GetFileMetadata()->row_groups; - for (idx_t i = 0; i < row_group_idx_p; i++) { - row_group_offset += row_groups[i].num_rows; - } -} - -idx_t RowNumberColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, - data_ptr_t repeat_out, Vector &result) { - - auto data_ptr = FlatVector::GetData(result); - for (idx_t i = 0; i < num_values; i++) { - data_ptr[i] = row_group_offset++; - } - return num_values; -} - -//===--------------------------------------------------------------------===// -// Cast Column Reader -//===--------------------------------------------------------------------===// -CastColumnReader::CastColumnReader(unique_ptr child_reader_p, LogicalType target_type_p) - : ColumnReader(child_reader_p->Reader(), std::move(target_type_p), child_reader_p->Schema(), - child_reader_p->FileIdx(), child_reader_p->MaxDefine(), child_reader_p->MaxRepeat()), - child_reader(std::move(child_reader_p)) { - vector intermediate_types {child_reader->Type()}; - intermediate_chunk.Initialize(reader.allocator, intermediate_types); -} - -unique_ptr CastColumnReader::Stats(idx_t row_group_idx_p, const vector &columns) { - // casting stats is not supported (yet) - return nullptr; -} - -void CastColumnReader::InitializeRead(idx_t row_group_idx_p, const vector &columns, - TProtocol &protocol_p) { - child_reader->InitializeRead(row_group_idx_p, columns, protocol_p); -} - -idx_t CastColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, - data_ptr_t repeat_out, Vector &result) { - intermediate_chunk.Reset(); - auto &intermediate_vector = intermediate_chunk.data[0]; - - auto amount = child_reader->Read(num_values, filter, define_out, repeat_out, intermediate_vector); - if (!filter.all()) { - // work-around for filters: set all values that are filtered to NULL to prevent the cast from failing on - // uninitialized data - intermediate_vector.Flatten(amount); - auto &validity = FlatVector::Validity(intermediate_vector); - for (idx_t i = 0; i < amount; i++) { - if (!filter[i]) { - validity.SetInvalid(i); - } - } - } - VectorOperations::DefaultCast(intermediate_vector, result, amount); - return amount; -} - -void CastColumnReader::Skip(idx_t num_values) { - child_reader->Skip(num_values); -} - -idx_t CastColumnReader::GroupRowsAvailable() { - return child_reader->GroupRowsAvailable(); -} - -//===--------------------------------------------------------------------===// -// Struct Column Reader -//===--------------------------------------------------------------------===// -StructColumnReader::StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, - idx_t schema_idx_p, idx_t max_define_p, idx_t max_repeat_p, - vector> child_readers_p) - : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p), - child_readers(std::move(child_readers_p)) { - D_ASSERT(type.InternalType() == PhysicalType::STRUCT); -} - -ColumnReader *StructColumnReader::GetChildReader(idx_t child_idx) { - D_ASSERT(child_idx < child_readers.size()); - return child_readers[child_idx].get(); -} - -void StructColumnReader::InitializeRead(idx_t row_group_idx_p, const vector &columns, - TProtocol &protocol_p) { - for (auto &child : child_readers) { - child->InitializeRead(row_group_idx_p, columns, protocol_p); - } -} - -idx_t StructColumnReader::Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, - data_ptr_t repeat_out, Vector &result) { - auto &struct_entries = StructVector::GetEntries(result); - D_ASSERT(StructType::GetChildTypes(Type()).size() == struct_entries.size()); - - if (pending_skips > 0) { - ApplyPendingSkips(pending_skips); - } - - idx_t read_count = num_values; - for (idx_t i = 0; i < struct_entries.size(); i++) { - auto child_num_values = child_readers[i]->Read(num_values, filter, define_out, repeat_out, *struct_entries[i]); - if (i == 0) { - read_count = child_num_values; - } else if (read_count != child_num_values) { - throw std::runtime_error("Struct child row count mismatch"); - } - } - // set the validity mask for this level - auto &validity = FlatVector::Validity(result); - for (idx_t i = 0; i < read_count; i++) { - if (define_out[i] < max_define) { - validity.SetInvalid(i); - } - } - - return read_count; -} - -void StructColumnReader::Skip(idx_t num_values) { - for (auto &child_reader : child_readers) { - child_reader->Skip(num_values); - } -} - -void StructColumnReader::RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) { - for (auto &child : child_readers) { - child->RegisterPrefetch(transport, allow_merge); - } -} - -uint64_t StructColumnReader::TotalCompressedSize() { - uint64_t size = 0; - for (auto &child : child_readers) { - size += child->TotalCompressedSize(); - } - return size; -} - -static bool TypeHasExactRowCount(const LogicalType &type) { - switch (type.id()) { - case LogicalTypeId::LIST: - case LogicalTypeId::MAP: - return false; - case LogicalTypeId::STRUCT: - for (auto &kv : StructType::GetChildTypes(type)) { - if (TypeHasExactRowCount(kv.second)) { - return true; - } - } - return false; - default: - return true; - } -} - -idx_t StructColumnReader::GroupRowsAvailable() { - for (idx_t i = 0; i < child_readers.size(); i++) { - if (TypeHasExactRowCount(child_readers[i]->Type())) { - return child_readers[i]->GroupRowsAvailable(); - } - } - return child_readers[0]->GroupRowsAvailable(); -} - -//===--------------------------------------------------------------------===// -// Decimal Column Reader -//===--------------------------------------------------------------------===// -template -struct DecimalParquetValueConversion { - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = reinterpret_cast(dict.ptr); - return dict_ptr[offset]; - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len; - if (FIXED_LENGTH) { - byte_len = (idx_t)reader.Schema().type_length; /* sure, type length needs to be a signed int */ - } else { - byte_len = plain_data.read(); - } - plain_data.available(byte_len); - auto res = - ParquetDecimalUtils::ReadDecimalValue(const_data_ptr_cast(plain_data.ptr), byte_len); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - uint32_t decimal_len = FIXED_LENGTH ? reader.Schema().type_length : plain_data.read(); - plain_data.inc(decimal_len); - } -}; - -template -class DecimalColumnReader - : public TemplatedColumnReader> { - using BaseType = - TemplatedColumnReader>; - -public: - DecimalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, // NOLINT - idx_t file_idx_p, idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader>( - reader, std::move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - BaseType::AllocateDict(num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = - DecimalParquetValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -template -static unique_ptr CreateDecimalReaderInternal(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define, idx_t max_repeat) { - switch (type_p.InternalType()) { - case PhysicalType::INT16: - return make_uniq>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT32: - return make_uniq>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT64: - return make_uniq>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - case PhysicalType::INT128: - return make_uniq>(reader, type_p, schema_p, file_idx_p, max_define, - max_repeat); - default: - throw InternalException("Unrecognized type for Decimal"); - } -} - -unique_ptr ParquetDecimalUtils::CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define, idx_t max_repeat) { - if (schema_p.__isset.type_length) { - return CreateDecimalReaderInternal(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else { - return CreateDecimalReaderInternal(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } -} - -//===--------------------------------------------------------------------===// -// UUID Column Reader -//===--------------------------------------------------------------------===// -struct UUIDValueConversion { - static hugeint_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = reinterpret_cast(dict.ptr); - return dict_ptr[offset]; - } - - static hugeint_t ReadParquetUUID(const_data_ptr_t input) { - hugeint_t result; - result.lower = 0; - uint64_t unsigned_upper = 0; - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - unsigned_upper <<= 8; - unsigned_upper += input[i]; - } - for (idx_t i = sizeof(uint64_t); i < sizeof(hugeint_t); i++) { - result.lower <<= 8; - result.lower += input[i]; - } - result.upper = unsigned_upper; - result.upper ^= (int64_t(1) << 63); - return result; - } - - static hugeint_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len = sizeof(hugeint_t); - plain_data.available(byte_len); - auto res = ReadParquetUUID(const_data_ptr_cast(plain_data.ptr)); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(hugeint_t)); - } -}; - -class UUIDColumnReader : public TemplatedColumnReader { - -public: - UUIDColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, std::move(type_p), schema_p, file_idx_p, - max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { // NOLINT - AllocateDict(num_entries * sizeof(hugeint_t)); - auto dict_ptr = reinterpret_cast(this->dict->ptr); - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = UUIDValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -//===--------------------------------------------------------------------===// -// Interval Column Reader -//===--------------------------------------------------------------------===// -struct IntervalValueConversion { - static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12; - - static interval_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - auto dict_ptr = reinterpret_cast(dict.ptr); - return dict_ptr[offset]; - } - - static interval_t ReadParquetInterval(const_data_ptr_t input) { - interval_t result; - result.months = Load(input); - result.days = Load(input + sizeof(uint32_t)); - result.micros = int64_t(Load(input + sizeof(uint32_t) * 2)) * 1000; - return result; - } - - static interval_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - idx_t byte_len = PARQUET_INTERVAL_SIZE; - plain_data.available(byte_len); - auto res = ReadParquetInterval(const_data_ptr_cast(plain_data.ptr)); - - plain_data.inc(byte_len); - return res; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(PARQUET_INTERVAL_SIZE); - } -}; - -class IntervalColumnReader : public TemplatedColumnReader { - -public: - IntervalColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, std::move(type_p), schema_p, file_idx_p, - max_define_p, max_repeat_p) {}; - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override { // NOLINT - AllocateDict(num_entries * sizeof(interval_t)); - auto dict_ptr = reinterpret_cast(this->dict->ptr); - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = IntervalValueConversion::PlainRead(*dictionary_data, *this); - } - } -}; - -//===--------------------------------------------------------------------===// -// Create Column Reader -//===--------------------------------------------------------------------===// -template -unique_ptr CreateDecimalReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat) { - switch (type_p.InternalType()) { - case PhysicalType::INT16: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT32: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case PhysicalType::INT64: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - throw NotImplementedException("Unimplemented internal type for CreateDecimalReader"); - } -} - -unique_ptr ColumnReader::CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat) { - switch (type_p.id()) { - case LogicalTypeId::BOOLEAN: - return make_uniq(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UTINYINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::USMALLINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UINTEGER: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::UBIGINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::TINYINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::SMALLINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::INTEGER: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::BIGINT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::FLOAT: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DOUBLE: - return make_uniq>>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: - switch (schema_p.type) { - case Type::INT96: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::INT64: - if (schema_p.__isset.logicalType && schema_p.logicalType.__isset.TIMESTAMP) { - if (schema_p.logicalType.TIMESTAMP.unit.__isset.MILLIS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else if (schema_p.logicalType.TIMESTAMP.unit.__isset.MICROS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else if (schema_p.logicalType.TIMESTAMP.unit.__isset.NANOS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } - } else if (schema_p.__isset.converted_type) { - switch (schema_p.converted_type) { - case ConvertedType::TIMESTAMP_MICROS: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case ConvertedType::TIMESTAMP_MILLIS: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - } - default: - break; - } - break; - case LogicalTypeId::DATE: - return make_uniq>(reader, type_p, schema_p, file_idx_p, - max_define, max_repeat); - case LogicalTypeId::TIME: - if (schema_p.__isset.logicalType && schema_p.logicalType.__isset.TIME) { - if (schema_p.logicalType.TIME.unit.__isset.MILLIS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else if (schema_p.logicalType.TIME.unit.__isset.MICROS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } else if (schema_p.logicalType.TIME.unit.__isset.NANOS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } - } else if (schema_p.__isset.converted_type) { - switch (schema_p.converted_type) { - case ConvertedType::TIME_MICROS: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case ConvertedType::TIME_MILLIS: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - } - case LogicalTypeId::TIME_TZ: - if (schema_p.__isset.logicalType && schema_p.logicalType.__isset.TIME) { - if (schema_p.logicalType.TIME.unit.__isset.MICROS) { - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - } - } else if (schema_p.__isset.converted_type) { - switch (schema_p.converted_type) { - case ConvertedType::TIME_MICROS: - return make_uniq>( - reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - return make_uniq(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::DECIMAL: - // we have to figure out what kind of int we need - switch (schema_p.type) { - case Type::INT32: - return CreateDecimalReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::INT64: - return CreateDecimalReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - return ParquetDecimalUtils::CreateReader(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - throw NotImplementedException("Unrecognized Parquet type for Decimal"); - } - break; - case LogicalTypeId::UUID: - return make_uniq(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - case LogicalTypeId::INTERVAL: - return make_uniq(reader, type_p, schema_p, file_idx_p, max_define, max_repeat); - default: - break; - } - throw NotImplementedException(type_p.ToString()); -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/column_writer.cpp b/src/duckdb/extension/parquet/column_writer.cpp deleted file mode 100644 index 2832117c9..000000000 --- a/src/duckdb/extension/parquet/column_writer.cpp +++ /dev/null @@ -1,2043 +0,0 @@ -#include "column_writer.hpp" - -#include "duckdb.hpp" -#include "parquet_rle_bp_decoder.hpp" -#include "parquet_rle_bp_encoder.hpp" -#include "parquet_writer.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/mutex.hpp" -#include "duckdb/common/operator/comparison_operators.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/serializer/buffered_serializer.hpp" -#include "duckdb/common/string_map_set.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/hugeint.hpp" -#include "duckdb/common/types/string_heap.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#endif - -#include "miniz_wrapper.hpp" -#include "snappy.h" -#include "zstd.h" - -namespace duckdb { - -using namespace duckdb_parquet; // NOLINT -using namespace duckdb_miniz; // NOLINT - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::PageType; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::Type; - -#define PARQUET_DEFINE_VALID 65535 - -static void VarintEncode(uint32_t val, Serializer &ser) { - do { - uint8_t byte = val & 127; - val >>= 7; - if (val != 0) { - byte |= 128; - } - ser.Write(byte); - } while (val != 0); -} - -static uint8_t GetVarintSize(uint32_t val) { - uint8_t res = 0; - do { - val >>= 7; - res++; - } while (val != 0); - return res; -} - -//===--------------------------------------------------------------------===// -// ColumnWriterStatistics -//===--------------------------------------------------------------------===// -ColumnWriterStatistics::~ColumnWriterStatistics() { -} - -string ColumnWriterStatistics::GetMin() { - return string(); -} - -string ColumnWriterStatistics::GetMax() { - return string(); -} - -string ColumnWriterStatistics::GetMinValue() { - return string(); -} - -string ColumnWriterStatistics::GetMaxValue() { - return string(); -} - -//===--------------------------------------------------------------------===// -// RleBpEncoder -//===--------------------------------------------------------------------===// -RleBpEncoder::RleBpEncoder(uint32_t bit_width) - : byte_width((bit_width + 7) / 8), byte_count(idx_t(-1)), run_count(idx_t(-1)) { -} - -// we always RLE everything (for now) -void RleBpEncoder::BeginPrepare(uint32_t first_value) { - byte_count = 0; - run_count = 1; - current_run_count = 1; - last_value = first_value; -} - -void RleBpEncoder::FinishRun() { - // last value, or value has changed - // write out the current run - byte_count += GetVarintSize(current_run_count << 1) + byte_width; - current_run_count = 1; - run_count++; -} - -void RleBpEncoder::PrepareValue(uint32_t value) { - if (value != last_value) { - FinishRun(); - last_value = value; - } else { - current_run_count++; - } -} - -void RleBpEncoder::FinishPrepare() { - FinishRun(); -} - -idx_t RleBpEncoder::GetByteCount() { - D_ASSERT(byte_count != idx_t(-1)); - return byte_count; -} - -void RleBpEncoder::BeginWrite(Serializer &writer, uint32_t first_value) { - // start the RLE runs - last_value = first_value; - current_run_count = 1; -} - -void RleBpEncoder::WriteRun(Serializer &writer) { - // write the header of the run - VarintEncode(current_run_count << 1, writer); - // now write the value - D_ASSERT(last_value >> (byte_width * 8) == 0); - switch (byte_width) { - case 1: - writer.Write(last_value); - break; - case 2: - writer.Write(last_value); - break; - case 3: - writer.Write(last_value & 0xFF); - writer.Write((last_value >> 8) & 0xFF); - writer.Write((last_value >> 16) & 0xFF); - break; - case 4: - writer.Write(last_value); - break; - default: - throw InternalException("unsupported byte width for RLE encoding"); - } - current_run_count = 1; -} - -void RleBpEncoder::WriteValue(Serializer &writer, uint32_t value) { - if (value != last_value) { - WriteRun(writer); - last_value = value; - } else { - current_run_count++; - } -} - -void RleBpEncoder::FinishWrite(Serializer &writer) { - WriteRun(writer); -} - -//===--------------------------------------------------------------------===// -// ColumnWriter -//===--------------------------------------------------------------------===// -ColumnWriter::ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : writer(writer), schema_idx(schema_idx), schema_path(std::move(schema_path_p)), max_repeat(max_repeat), - max_define(max_define), can_have_nulls(can_have_nulls), null_count(0) { -} -ColumnWriter::~ColumnWriter() { -} - -ColumnWriterState::~ColumnWriterState() { -} - -void ColumnWriter::CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, - unique_ptr &compressed_buf) { - switch (writer.GetCodec()) { - case CompressionCodec::UNCOMPRESSED: - compressed_size = temp_writer.blob.size; - compressed_data = temp_writer.blob.data.get(); - break; - case CompressionCodec::SNAPPY: { - compressed_size = duckdb_snappy::MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - duckdb_snappy::RawCompress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size, - char_ptr_cast(compressed_buf.get()), &compressed_size); - compressed_data = compressed_buf.get(); - D_ASSERT(compressed_size <= duckdb_snappy::MaxCompressedLength(temp_writer.blob.size)); - break; - } - case CompressionCodec::GZIP: { - MiniZStream s; - compressed_size = s.MaxCompressedLength(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - s.Compress(const_char_ptr_cast(temp_writer.blob.data.get()), temp_writer.blob.size, - char_ptr_cast(compressed_buf.get()), &compressed_size); - compressed_data = compressed_buf.get(); - break; - } - case CompressionCodec::ZSTD: { - compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.blob.size); - compressed_buf = unique_ptr(new data_t[compressed_size]); - compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size, - (const void *)temp_writer.blob.data.get(), temp_writer.blob.size, - ZSTD_CLEVEL_DEFAULT); - compressed_data = compressed_buf.get(); - break; - } - default: - throw InternalException("Unsupported codec for Parquet Writer"); - } - - if (compressed_size > idx_t(NumericLimits::Maximum())) { - throw InternalException("Parquet writer: %d compressed page size out of range for type integer", - temp_writer.blob.size); - } -} - -void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterState *parent, idx_t count, - idx_t max_repeat) { - if (!parent) { - // no repeat levels without a parent node - return; - } - while (state.repetition_levels.size() < parent->repetition_levels.size()) { - state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]); - } -} - -void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, - idx_t count, uint16_t define_value, uint16_t null_value) { - if (parent) { - // parent node: inherit definition level from the parent - idx_t vector_index = 0; - while (state.definition_levels.size() < parent->definition_levels.size()) { - idx_t current_index = state.definition_levels.size(); - if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[current_index]); - } else if (validity.RowIsValid(vector_index)) { - state.definition_levels.push_back(define_value); - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - null_count++; - state.definition_levels.push_back(null_value); - } - if (parent->is_empty.empty() || !parent->is_empty[current_index]) { - vector_index++; - } - } - } else { - // no parent: set definition levels only from this validity mask - for (idx_t i = 0; i < count; i++) { - if (validity.RowIsValid(i)) { - state.definition_levels.push_back(define_value); - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - null_count++; - state.definition_levels.push_back(null_value); - } - } - } -} - -class ColumnWriterPageState { -public: - virtual ~ColumnWriterPageState() { - } - -public: - template - TARGET &Cast() { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } - template - const TARGET &Cast() const { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } -}; - -struct PageInformation { - idx_t offset = 0; - idx_t row_count = 0; - idx_t empty_count = 0; - idx_t estimated_page_size = 0; -}; - -struct PageWriteInformation { - PageHeader page_header; - unique_ptr temp_writer; - unique_ptr page_state; - idx_t write_page_idx = 0; - idx_t write_count = 0; - idx_t max_write_count = 0; - size_t compressed_size; - data_ptr_t compressed_data; - unique_ptr compressed_buf; -}; - -class BasicColumnWriterState : public ColumnWriterState { -public: - BasicColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - page_info.emplace_back(); - } - ~BasicColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - vector page_info; - vector write_info; - unique_ptr stats_state; - idx_t current_page = 0; -}; - -//===--------------------------------------------------------------------===// -// BasicColumnWriter -// A base class for writing all non-compound types (ex. numerics, strings) -//===--------------------------------------------------------------------===// -class BasicColumnWriter : public ColumnWriter { -public: - BasicColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls) { - } - - ~BasicColumnWriter() override = default; - - //! We limit the uncompressed page size to 100MB - // The max size in Parquet is 2GB, but we choose a more conservative limit - static constexpr const idx_t MAX_UNCOMPRESSED_PAGE_SIZE = 100000000; - //! Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page. - // For this reason we go with a much higher, but still a conservative upper bound of 1GB; - static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1e9; - - // the maximum size a key entry in an RLE page takes - static constexpr const idx_t MAX_DICTIONARY_KEY_SIZE = sizeof(uint32_t); - // the size of encoding the string length - static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t); - -public: - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override; - void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - void BeginWrite(ColumnWriterState &state) override; - void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; - void FinalizeWrite(ColumnWriterState &state) override; - -protected: - void WriteLevels(Serializer &temp_writer, const vector &levels, idx_t max_value, idx_t start_offset, - idx_t count); - - virtual duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state); - - void NextPage(BasicColumnWriterState &state); - void FlushPage(BasicColumnWriterState &state); - - //! Initializes the state used to track statistics during writing. Only used for scalar types. - virtual unique_ptr InitializeStatsState(); - - //! Initialize the writer for a specific page. Only used for scalar types. - virtual unique_ptr InitializePageState(BasicColumnWriterState &state); - - //! Flushes the writer for a specific page. Only used for scalar types. - virtual void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state); - - //! Retrieves the row size of a vector at the specified location. Only used for scalar types. - virtual idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state); - //! Writes a (subset of a) vector to the specified serializer. Only used for scalar types. - virtual void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state, - Vector &vector, idx_t chunk_start, idx_t chunk_end) = 0; - - virtual bool HasDictionary(BasicColumnWriterState &state_p) { - return false; - } - //! The number of elements in the dictionary - virtual idx_t DictionarySize(BasicColumnWriterState &state_p); - void WriteDictionary(BasicColumnWriterState &state, unique_ptr temp_writer, idx_t row_count); - virtual void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats); - - void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column); - void RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group); -}; - -unique_ptr BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_uniq(row_group, row_group.columns.size()); - RegisterToRowGroup(row_group); - return std::move(result); -} - -void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group) { - format::ColumnChunk column_chunk; - column_chunk.__isset.meta_data = true; - column_chunk.meta_data.codec = writer.GetCodec(); - column_chunk.meta_data.path_in_schema = schema_path; - column_chunk.meta_data.num_values = 0; - column_chunk.meta_data.type = writer.GetType(schema_idx); - row_group.columns.push_back(std::move(column_chunk)); -} - -unique_ptr BasicColumnWriter::InitializePageState(BasicColumnWriterState &state) { - return nullptr; -} - -void BasicColumnWriter::FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state) { -} - -void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - auto &col_chunk = state.row_group.columns[state.col_idx]; - - idx_t start = 0; - idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count; - idx_t parent_index = state.definition_levels.size(); - auto &validity = FlatVector::Validity(vector); - HandleRepeatLevels(state, parent, count, max_repeat); - HandleDefineLevels(state, parent, validity, count, max_define, max_define - 1); - - idx_t vector_index = 0; - for (idx_t i = start; i < vcount; i++) { - auto &page_info = state.page_info.back(); - page_info.row_count++; - col_chunk.meta_data.num_values++; - if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) { - page_info.empty_count++; - continue; - } - if (validity.RowIsValid(vector_index)) { - page_info.estimated_page_size += GetRowSize(vector, vector_index, state); - if (page_info.estimated_page_size >= MAX_UNCOMPRESSED_PAGE_SIZE) { - PageInformation new_info; - new_info.offset = page_info.offset + page_info.row_count; - state.page_info.push_back(new_info); - } - } - vector_index++; - } -} - -duckdb_parquet::format::Encoding::type BasicColumnWriter::GetEncoding(BasicColumnWriterState &state) { - return Encoding::PLAIN; -} - -void BasicColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - - // set up the page write info - state.stats_state = InitializeStatsState(); - for (idx_t page_idx = 0; page_idx < state.page_info.size(); page_idx++) { - auto &page_info = state.page_info[page_idx]; - if (page_info.row_count == 0) { - D_ASSERT(page_idx + 1 == state.page_info.size()); - state.page_info.erase(state.page_info.begin() + page_idx); - break; - } - PageWriteInformation write_info; - // set up the header - auto &hdr = write_info.page_header; - hdr.compressed_page_size = 0; - hdr.uncompressed_page_size = 0; - hdr.type = PageType::DATA_PAGE; - hdr.__isset.data_page_header = true; - - hdr.data_page_header.num_values = page_info.row_count; - hdr.data_page_header.encoding = GetEncoding(state); - hdr.data_page_header.definition_level_encoding = Encoding::RLE; - hdr.data_page_header.repetition_level_encoding = Encoding::RLE; - - write_info.temp_writer = make_uniq(); - write_info.write_count = page_info.empty_count; - write_info.max_write_count = page_info.row_count; - write_info.page_state = InitializePageState(state); - - write_info.compressed_size = 0; - write_info.compressed_data = nullptr; - - state.write_info.push_back(std::move(write_info)); - } - - // start writing the first page - NextPage(state); -} - -void BasicColumnWriter::WriteLevels(Serializer &temp_writer, const vector &levels, idx_t max_value, - idx_t offset, idx_t count) { - if (levels.empty() || count == 0) { - return; - } - - // write the levels using the RLE-BP encoding - auto bit_width = RleBpDecoder::ComputeBitWidth((max_value)); - RleBpEncoder rle_encoder(bit_width); - - rle_encoder.BeginPrepare(levels[offset]); - for (idx_t i = offset + 1; i < offset + count; i++) { - rle_encoder.PrepareValue(levels[i]); - } - rle_encoder.FinishPrepare(); - - // start off by writing the byte count as a uint32_t - temp_writer.Write(rle_encoder.GetByteCount()); - rle_encoder.BeginWrite(temp_writer, levels[offset]); - for (idx_t i = offset + 1; i < offset + count; i++) { - rle_encoder.WriteValue(temp_writer, levels[i]); - } - rle_encoder.FinishWrite(temp_writer); -} - -void BasicColumnWriter::NextPage(BasicColumnWriterState &state) { - if (state.current_page > 0) { - // need to flush the current page - FlushPage(state); - } - if (state.current_page >= state.write_info.size()) { - state.current_page = state.write_info.size() + 1; - return; - } - auto &page_info = state.page_info[state.current_page]; - auto &write_info = state.write_info[state.current_page]; - state.current_page++; - - auto &temp_writer = *write_info.temp_writer; - - // write the repetition levels - WriteLevels(temp_writer, state.repetition_levels, max_repeat, page_info.offset, page_info.row_count); - - // write the definition levels - WriteLevels(temp_writer, state.definition_levels, max_define, page_info.offset, page_info.row_count); -} - -void BasicColumnWriter::FlushPage(BasicColumnWriterState &state) { - D_ASSERT(state.current_page > 0); - if (state.current_page > state.write_info.size()) { - return; - } - - // compress the page info - auto &write_info = state.write_info[state.current_page - 1]; - auto &temp_writer = *write_info.temp_writer; - auto &hdr = write_info.page_header; - - FlushPageState(temp_writer, write_info.page_state.get()); - - // now that we have finished writing the data we know the uncompressed size - if (temp_writer.blob.size > idx_t(NumericLimits::Maximum())) { - throw InternalException("Parquet writer: %d uncompressed page size out of range for type integer", - temp_writer.blob.size); - } - hdr.uncompressed_page_size = temp_writer.blob.size; - - // compress the data - CompressPage(temp_writer, write_info.compressed_size, write_info.compressed_data, write_info.compressed_buf); - hdr.compressed_page_size = write_info.compressed_size; - D_ASSERT(hdr.uncompressed_page_size > 0); - D_ASSERT(hdr.compressed_page_size > 0); - - if (write_info.compressed_buf) { - // if the data has been compressed, we no longer need the compressed data - D_ASSERT(write_info.compressed_buf.get() == write_info.compressed_data); - write_info.temp_writer.reset(); - } -} - -unique_ptr BasicColumnWriter::InitializeStatsState() { - return make_uniq(); -} - -idx_t BasicColumnWriter::GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) { - throw InternalException("GetRowSize unsupported for struct/list column writers"); -} - -void BasicColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - - idx_t remaining = count; - idx_t offset = 0; - while (remaining > 0) { - auto &write_info = state.write_info[state.current_page - 1]; - if (!write_info.temp_writer) { - throw InternalException("Writes are not correctly aligned!?"); - } - auto &temp_writer = *write_info.temp_writer; - idx_t write_count = MinValue(remaining, write_info.max_write_count - write_info.write_count); - D_ASSERT(write_count > 0); - - WriteVector(temp_writer, state.stats_state.get(), write_info.page_state.get(), vector, offset, - offset + write_count); - - write_info.write_count += write_count; - if (write_info.write_count == write_info.max_write_count) { - NextPage(state); - } - offset += write_count; - remaining -= write_count; - } -} - -void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state, - duckdb_parquet::format::ColumnChunk &column_chunk) { - if (max_repeat == 0) { - column_chunk.meta_data.statistics.null_count = null_count; - column_chunk.meta_data.statistics.__isset.null_count = true; - column_chunk.meta_data.__isset.statistics = true; - } - // set min/max/min_value/max_value - // this code is not going to win any beauty contests, but well - auto min = state.stats_state->GetMin(); - if (!min.empty()) { - column_chunk.meta_data.statistics.min = std::move(min); - column_chunk.meta_data.statistics.__isset.min = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto max = state.stats_state->GetMax(); - if (!max.empty()) { - column_chunk.meta_data.statistics.max = std::move(max); - column_chunk.meta_data.statistics.__isset.max = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto min_value = state.stats_state->GetMinValue(); - if (!min_value.empty()) { - column_chunk.meta_data.statistics.min_value = std::move(min_value); - column_chunk.meta_data.statistics.__isset.min_value = true; - column_chunk.meta_data.__isset.statistics = true; - } - auto max_value = state.stats_state->GetMaxValue(); - if (!max_value.empty()) { - column_chunk.meta_data.statistics.max_value = std::move(max_value); - column_chunk.meta_data.statistics.__isset.max_value = true; - column_chunk.meta_data.__isset.statistics = true; - } - for (const auto &write_info : state.write_info) { - column_chunk.meta_data.encodings.push_back(write_info.page_header.data_page_header.encoding); - } -} - -void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - auto &column_chunk = state.row_group.columns[state.col_idx]; - - // flush the last page (if any remains) - FlushPage(state); - - auto &column_writer = writer.GetWriter(); - auto start_offset = column_writer.GetTotalWritten(); - auto page_offset = start_offset; - // flush the dictionary - if (HasDictionary(state)) { - column_chunk.meta_data.statistics.distinct_count = DictionarySize(state); - column_chunk.meta_data.statistics.__isset.distinct_count = true; - column_chunk.meta_data.dictionary_page_offset = page_offset; - column_chunk.meta_data.__isset.dictionary_page_offset = true; - FlushDictionary(state, state.stats_state.get()); - page_offset += state.write_info[0].compressed_size; - } - - // record the start position of the pages for this column - column_chunk.meta_data.data_page_offset = page_offset; - SetParquetStatistics(state, column_chunk); - - // write the individual pages to disk - idx_t total_uncompressed_size = 0; - for (auto &write_info : state.write_info) { - D_ASSERT(write_info.page_header.uncompressed_page_size > 0); - auto header_start_offset = column_writer.GetTotalWritten(); - write_info.page_header.write(writer.GetProtocol()); - // total uncompressed size in the column chunk includes the header size (!) - total_uncompressed_size += column_writer.GetTotalWritten() - header_start_offset; - total_uncompressed_size += write_info.page_header.uncompressed_page_size; - column_writer.WriteData(write_info.compressed_data, write_info.compressed_size); - } - column_chunk.meta_data.total_compressed_size = column_writer.GetTotalWritten() - start_offset; - column_chunk.meta_data.total_uncompressed_size = total_uncompressed_size; -} - -void BasicColumnWriter::FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats) { - throw InternalException("This page does not have a dictionary"); -} - -idx_t BasicColumnWriter::DictionarySize(BasicColumnWriterState &state) { - throw InternalException("This page does not have a dictionary"); -} - -void BasicColumnWriter::WriteDictionary(BasicColumnWriterState &state, unique_ptr temp_writer, - idx_t row_count) { - D_ASSERT(temp_writer); - D_ASSERT(temp_writer->blob.size > 0); - - // write the dictionary page header - PageWriteInformation write_info; - // set up the header - auto &hdr = write_info.page_header; - hdr.uncompressed_page_size = temp_writer->blob.size; - hdr.type = PageType::DICTIONARY_PAGE; - hdr.__isset.dictionary_page_header = true; - - hdr.dictionary_page_header.encoding = Encoding::PLAIN; - hdr.dictionary_page_header.is_sorted = false; - hdr.dictionary_page_header.num_values = row_count; - - write_info.temp_writer = std::move(temp_writer); - write_info.write_count = 0; - write_info.max_write_count = 0; - - // compress the contents of the dictionary page - CompressPage(*write_info.temp_writer, write_info.compressed_size, write_info.compressed_data, - write_info.compressed_buf); - hdr.compressed_page_size = write_info.compressed_size; - - // insert the dictionary page as the first page to write for this column - state.write_info.insert(state.write_info.begin(), std::move(write_info)); -} - -//===--------------------------------------------------------------------===// -// Standard Column Writer -//===--------------------------------------------------------------------===// -template -class NumericStatisticsState : public ColumnWriterStatistics { -public: - NumericStatisticsState() : min(NumericLimits::Maximum()), max(NumericLimits::Minimum()) { - } - - T min; - T max; - -public: - bool HasStats() { - return min <= max; - } - - string GetMin() override { - return NumericLimits::IsSigned() ? GetMinValue() : string(); - } - string GetMax() override { - return NumericLimits::IsSigned() ? GetMaxValue() : string(); - } - string GetMinValue() override { - return HasStats() ? string((char *)&min, sizeof(T)) : string(); - } - string GetMaxValue() override { - return HasStats() ? string((char *)&max, sizeof(T)) : string(); - } -}; - -struct BaseParquetOperator { - template - static unique_ptr InitializeStats() { - return make_uniq>(); - } - - template - static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) { - auto &numeric_stats = (NumericStatisticsState &)*stats; - if (LessThan::Operation(target_value, numeric_stats.min)) { - numeric_stats.min = target_value; - } - if (GreaterThan::Operation(target_value, numeric_stats.max)) { - numeric_stats.max = target_value; - } - } -}; - -struct ParquetCastOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return TGT(input); - } -}; - -struct ParquetTimestampNSOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return Timestamp::FromEpochNanoSeconds(input).value; - } -}; - -struct ParquetTimestampSOperator : public BaseParquetOperator { - template - static TGT Operation(SRC input) { - return Timestamp::FromEpochSeconds(input).value; - } -}; - -struct ParquetHugeintOperator { - template - static TGT Operation(SRC input) { - return Hugeint::Cast(input); - } - - template - static unique_ptr InitializeStats() { - return make_uniq(); - } - - template - static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) { - } -}; - -template -static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, idx_t chunk_start, idx_t chunk_end, - ValidityMask &mask, Serializer &ser) { - auto *ptr = FlatVector::GetData(col); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - TGT target_value = OP::template Operation(ptr[r]); - OP::template HandleStats(stats, ptr[r], target_value); - ser.Write(target_value); - } - } -} - -template -class StandardColumnWriter : public BasicColumnWriter { -public: - StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, // NOLINT - idx_t max_repeat, idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~StandardColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return OP::template InitializeStats(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - TemplatedWritePlain(input_column, stats, chunk_start, chunk_end, mask, temp_writer); - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return sizeof(TGT); - } -}; - -//===--------------------------------------------------------------------===// -// Boolean Column Writer -//===--------------------------------------------------------------------===// -class BooleanStatisticsState : public ColumnWriterStatistics { -public: - BooleanStatisticsState() : min(true), max(false) { - } - - bool min; - bool max; - -public: - bool HasStats() { - return !(min && !max); - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string(); - } - string GetMaxValue() override { - return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string(); - } -}; - -class BooleanWriterPageState : public ColumnWriterPageState { -public: - uint8_t byte = 0; - uint8_t byte_pos = 0; -}; - -class BooleanColumnWriter : public BasicColumnWriter { -public: - BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~BooleanColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_uniq(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &stats = stats_p->Cast(); - auto &state = state_p->Cast(); - auto &mask = FlatVector::Validity(input_column); - - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - // only encode if non-null - if (ptr[r]) { - stats.max = true; - state.byte |= 1 << state.byte_pos; - } else { - stats.min = false; - } - state.byte_pos++; - - if (state.byte_pos == 8) { - temp_writer.Write(state.byte); - state.byte = 0; - state.byte_pos = 0; - } - } - } - } - - unique_ptr InitializePageState(BasicColumnWriterState &state) override { - return make_uniq(); - } - - void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state_p) override { - auto &state = state_p->Cast(); - if (state.byte_pos > 0) { - temp_writer.Write(state.byte); - state.byte = 0; - state.byte_pos = 0; - } - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return sizeof(bool); - } -}; - -//===--------------------------------------------------------------------===// -// Decimal Column Writer -//===--------------------------------------------------------------------===// -static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) { - bool positive = input >= 0; - // numbers are stored as two's complement so some muckery is required - if (!positive) { - input = NumericLimits::Maximum() + input + 1; - } - uint64_t high_bytes = uint64_t(input.upper); - uint64_t low_bytes = input.lower; - - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[i] = (high_bytes >> shift_count) & 0xFF; - } - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; - } - if (!positive) { - result[0] |= 0x80; - } -} - -class FixedDecimalStatistics : public ColumnWriterStatistics { -public: - FixedDecimalStatistics() : min(NumericLimits::Maximum()), max(NumericLimits::Minimum()) { - } - - hugeint_t min; - hugeint_t max; - -public: - string GetStats(hugeint_t &input) { - data_t buffer[16]; - WriteParquetDecimal(input, buffer); - return string(const_char_ptr_cast(buffer), 16); - } - - bool HasStats() { - return min <= max; - } - - void Update(hugeint_t &val) { - if (LessThan::Operation(val, min)) { - min = val; - } - if (GreaterThan::Operation(val, max)) { - max = val; - } - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? GetStats(min) : string(); - } - string GetMaxValue() override { - return HasStats() ? GetStats(max) : string(); - } -}; - -class FixedDecimalColumnWriter : public BasicColumnWriter { -public: - FixedDecimalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~FixedDecimalColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_uniq(); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - auto &stats = stats_p->Cast(); - - data_t temp_buffer[16]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - stats.Update(ptr[r]); - WriteParquetDecimal(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, 16); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return sizeof(hugeint_t); - } -}; - -//===--------------------------------------------------------------------===// -// UUID Column Writer -//===--------------------------------------------------------------------===// -class UUIDColumnWriter : public BasicColumnWriter { - static constexpr const idx_t PARQUET_UUID_SIZE = 16; - -public: - UUIDColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~UUIDColumnWriter() override = default; - -public: - static void WriteParquetUUID(hugeint_t input, data_ptr_t result) { - uint64_t high_bytes = input.upper ^ (int64_t(1) << 63); - uint64_t low_bytes = input.lower; - - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[i] = (high_bytes >> shift_count) & 0xFF; - } - for (idx_t i = 0; i < sizeof(uint64_t); i++) { - auto shift_count = (sizeof(uint64_t) - i - 1) * 8; - result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF; - } - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - - data_t temp_buffer[PARQUET_UUID_SIZE]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - WriteParquetUUID(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, PARQUET_UUID_SIZE); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return PARQUET_UUID_SIZE; - } -}; - -//===--------------------------------------------------------------------===// -// Interval Column Writer -//===--------------------------------------------------------------------===// -class IntervalColumnWriter : public BasicColumnWriter { - static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12; - -public: - IntervalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~IntervalColumnWriter() override = default; - -public: - static void WriteParquetInterval(interval_t input, data_ptr_t result) { - if (input.days < 0 || input.months < 0 || input.micros < 0) { - throw IOException("Parquet files do not support negative intervals"); - } - Store(input.months, result); - Store(input.days, result + sizeof(uint32_t)); - Store(input.micros / 1000, result + sizeof(uint32_t) * 2); - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - - data_t temp_buffer[PARQUET_INTERVAL_SIZE]; - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - WriteParquetInterval(ptr[r], temp_buffer); - temp_writer.WriteData(temp_buffer, PARQUET_INTERVAL_SIZE); - } - } - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return PARQUET_INTERVAL_SIZE; - } -}; - -//===--------------------------------------------------------------------===// -// String Column Writer -//===--------------------------------------------------------------------===// -class StringStatisticsState : public ColumnWriterStatistics { - static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 10000; - -public: - StringStatisticsState() : has_stats(false), values_too_big(false), min(), max() { - } - - bool has_stats; - bool values_too_big; - string min; - string max; - -public: - bool HasStats() { - return has_stats; - } - - void Update(const string_t &val) { - if (values_too_big) { - return; - } - auto str_len = val.GetSize(); - if (str_len > MAX_STRING_STATISTICS_SIZE) { - // we avoid gathering stats when individual string values are too large - // this is because the statistics are copied into the Parquet file meta data in uncompressed format - // ideally we avoid placing several mega or giga-byte long strings there - // we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats - values_too_big = true; - min = string(); - max = string(); - return; - } - if (!has_stats || LessThan::Operation(val, string_t(min))) { - min = val.GetString(); - } - if (!has_stats || GreaterThan::Operation(val, string_t(max))) { - max = val.GetString(); - } - has_stats = true; - } - - string GetMin() override { - return GetMinValue(); - } - string GetMax() override { - return GetMaxValue(); - } - string GetMinValue() override { - return HasStats() ? min : string(); - } - string GetMaxValue() override { - return HasStats() ? max : string(); - } -}; - -class StringColumnWriterState : public BasicColumnWriterState { -public: - StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : BasicColumnWriterState(row_group, col_idx) { - } - ~StringColumnWriterState() override = default; - - // analysis state - idx_t estimated_dict_page_size = 0; - idx_t estimated_rle_pages_size = 0; - idx_t estimated_plain_size = 0; - - // Dictionary and accompanying string heap - string_map_t dictionary; - // key_bit_width== 0 signifies the chunk is written in plain encoding - uint32_t key_bit_width; - - bool IsDictionaryEncoded() { - return key_bit_width != 0; - } -}; - -class StringWriterPageState : public ColumnWriterPageState { -public: - explicit StringWriterPageState(uint32_t bit_width, const string_map_t &values) - : bit_width(bit_width), dictionary(values), encoder(bit_width), written_value(false) { - D_ASSERT(IsDictionaryEncoded() || (bit_width == 0 && dictionary.empty())); - } - - bool IsDictionaryEncoded() { - return bit_width != 0; - } - // if 0, we're writing a plain page - uint32_t bit_width; - const string_map_t &dictionary; - RleBpEncoder encoder; - bool written_value; -}; - -class StringColumnWriter : public BasicColumnWriter { -public: - StringColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) { - } - ~StringColumnWriter() override = default; - -public: - unique_ptr InitializeStatsState() override { - return make_uniq(); - } - - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override { - auto result = make_uniq(row_group, row_group.columns.size()); - RegisterToRowGroup(row_group); - return std::move(result); - } - - bool HasAnalyze() override { - return true; - } - - void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override { - auto &state = state_p.Cast(); - - idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count; - idx_t parent_index = state.definition_levels.size(); - auto &validity = FlatVector::Validity(vector); - idx_t vector_index = 0; - uint32_t new_value_index = state.dictionary.size(); - uint32_t last_value_index = -1; - idx_t run_length = 0; - idx_t run_count = 0; - auto strings = FlatVector::GetData(vector); - for (idx_t i = 0; i < vcount; i++) { - - if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) { - continue; - } - - if (validity.RowIsValid(vector_index)) { - run_length++; - const auto &value = strings[vector_index]; - // Try to insert into the dictionary. If it's already there, we get back the value index - auto found = state.dictionary.insert(string_map_t::value_type(value, new_value_index)); - state.estimated_plain_size += value.GetSize() + STRING_LENGTH_SIZE; - if (found.second) { - // string didn't exist yet in the dictionary - new_value_index++; - state.estimated_dict_page_size += value.GetSize() + MAX_DICTIONARY_KEY_SIZE; - } - // if the value changed, we will encode it in the page - if (last_value_index != found.first->second) { - // we will add the value index size later, when we know the total number of keys - state.estimated_rle_pages_size += GetVarintSize(run_length); - run_length = 0; - run_count++; - last_value_index = found.first->second; - } - } - vector_index++; - } - // Add the costs of keys sizes. We don't know yet how many bytes the keys need as we haven't - // seen all the values. therefore we use an over-estimation of - state.estimated_rle_pages_size += MAX_DICTIONARY_KEY_SIZE * run_count; - } - - void FinalizeAnalyze(ColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - - // check if a dictionary will require more space than a plain write, or if the dictionary page is going to - // be too large - if (state.estimated_dict_page_size > MAX_UNCOMPRESSED_DICT_PAGE_SIZE || - state.estimated_rle_pages_size + state.estimated_dict_page_size > state.estimated_plain_size) { - // clearing the dictionary signals a plain write - state.dictionary.clear(); - state.key_bit_width = 0; - } else { - state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size()); - } - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state_p, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &page_state = page_state_p->Cast(); - auto &mask = FlatVector::Validity(input_column); - auto &stats = stats_p->Cast(); - - auto *ptr = FlatVector::GetData(input_column); - if (page_state.IsDictionaryEncoded()) { - // dictionary based page - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (!mask.RowIsValid(r)) { - continue; - } - auto value_index = page_state.dictionary.at(ptr[r]); - if (!page_state.written_value) { - // first value - // write the bit-width as a one-byte entry - temp_writer.Write(page_state.bit_width); - // now begin writing the actual value - page_state.encoder.BeginWrite(temp_writer, value_index); - page_state.written_value = true; - } else { - page_state.encoder.WriteValue(temp_writer, value_index); - } - } - } else { - // plain page - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (!mask.RowIsValid(r)) { - continue; - } - stats.Update(ptr[r]); - temp_writer.Write(ptr[r].GetSize()); - temp_writer.WriteData(const_data_ptr_cast(ptr[r].GetData()), ptr[r].GetSize()); - } - } - } - - unique_ptr InitializePageState(BasicColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - return make_uniq(state.key_bit_width, state.dictionary); - } - - void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state_p) override { - auto &page_state = state_p->Cast(); - if (page_state.bit_width != 0) { - if (!page_state.written_value) { - // all values are null - // just write the bit width - temp_writer.Write(page_state.bit_width); - return; - } - page_state.encoder.FinishWrite(temp_writer); - } - } - - duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - return state.IsDictionaryEncoded() ? Encoding::RLE_DICTIONARY : Encoding::PLAIN; - } - - bool HasDictionary(BasicColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - return state.IsDictionaryEncoded(); - } - - idx_t DictionarySize(BasicColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - D_ASSERT(state.IsDictionaryEncoded()); - return state.dictionary.size(); - } - - void FlushDictionary(BasicColumnWriterState &state_p, ColumnWriterStatistics *stats_p) override { - auto &stats = stats_p->Cast(); - auto &state = state_p.Cast(); - if (!state.IsDictionaryEncoded()) { - return; - } - // first we need to sort the values in index order - auto values = vector(state.dictionary.size()); - for (const auto &entry : state.dictionary) { - D_ASSERT(values[entry.second].GetSize() == 0); - values[entry.second] = entry.first; - } - // first write the contents of the dictionary page to a temporary buffer - auto temp_writer = make_uniq(); - for (idx_t r = 0; r < values.size(); r++) { - auto &value = values[r]; - // update the statistics - stats.Update(value); - // write this string value to the dictionary - temp_writer->Write(value.GetSize()); - temp_writer->WriteData(const_data_ptr_cast((value.GetData())), value.GetSize()); - } - // flush the dictionary page and add it to the to-be-written pages - WriteDictionary(state, std::move(temp_writer), values.size()); - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state_p) override { - auto &state = state_p.Cast(); - if (state.IsDictionaryEncoded()) { - return (state.key_bit_width + 7) / 8; - } else { - auto strings = FlatVector::GetData(vector); - return strings[index].GetSize(); - } - } -}; - -//===--------------------------------------------------------------------===// -// Enum Column Writer -//===--------------------------------------------------------------------===// -class EnumWriterPageState : public ColumnWriterPageState { -public: - explicit EnumWriterPageState(uint32_t bit_width) : encoder(bit_width), written_value(false) { - } - - RleBpEncoder encoder; - bool written_value; -}; - -class EnumColumnWriter : public BasicColumnWriter { -public: - EnumColumnWriter(ParquetWriter &writer, LogicalType enum_type_p, idx_t schema_idx, vector schema_path_p, - idx_t max_repeat, idx_t max_define, bool can_have_nulls) - : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls), - enum_type(std::move(enum_type_p)) { - bit_width = RleBpDecoder::ComputeBitWidth(EnumType::GetSize(enum_type)); - } - ~EnumColumnWriter() override = default; - - LogicalType enum_type; - uint32_t bit_width; - -public: - unique_ptr InitializeStatsState() override { - return make_uniq(); - } - - template - void WriteEnumInternal(Serializer &temp_writer, Vector &input_column, idx_t chunk_start, idx_t chunk_end, - EnumWriterPageState &page_state) { - auto &mask = FlatVector::Validity(input_column); - auto *ptr = FlatVector::GetData(input_column); - for (idx_t r = chunk_start; r < chunk_end; r++) { - if (mask.RowIsValid(r)) { - if (!page_state.written_value) { - // first value - // write the bit-width as a one-byte entry - temp_writer.Write(bit_width); - // now begin writing the actual value - page_state.encoder.BeginWrite(temp_writer, ptr[r]); - page_state.written_value = true; - } else { - page_state.encoder.WriteValue(temp_writer, ptr[r]); - } - } - } - } - - void WriteVector(Serializer &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state_p, - Vector &input_column, idx_t chunk_start, idx_t chunk_end) override { - auto &page_state = page_state_p->Cast(); - switch (enum_type.InternalType()) { - case PhysicalType::UINT8: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - case PhysicalType::UINT16: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - case PhysicalType::UINT32: - WriteEnumInternal(temp_writer, input_column, chunk_start, chunk_end, page_state); - break; - default: - throw InternalException("Unsupported internal enum type"); - } - } - - unique_ptr InitializePageState(BasicColumnWriterState &state) override { - return make_uniq(bit_width); - } - - void FlushPageState(Serializer &temp_writer, ColumnWriterPageState *state_p) override { - auto &page_state = state_p->Cast(); - if (!page_state.written_value) { - // all values are null - // just write the bit width - temp_writer.Write(bit_width); - return; - } - page_state.encoder.FinishWrite(temp_writer); - } - - duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state) override { - return Encoding::RLE_DICTIONARY; - } - - bool HasDictionary(BasicColumnWriterState &state) override { - return true; - } - - idx_t DictionarySize(BasicColumnWriterState &state_p) override { - return EnumType::GetSize(enum_type); - } - - void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats_p) override { - auto &stats = stats_p->Cast(); - // write the enum values to a dictionary page - auto &enum_values = EnumType::GetValuesInsertOrder(enum_type); - auto enum_count = EnumType::GetSize(enum_type); - auto string_values = FlatVector::GetData(enum_values); - // first write the contents of the dictionary page to a temporary buffer - auto temp_writer = make_uniq(); - for (idx_t r = 0; r < enum_count; r++) { - D_ASSERT(!FlatVector::IsNull(enum_values, r)); - // update the statistics - stats.Update(string_values[r]); - // write this string value to the dictionary - temp_writer->Write(string_values[r].GetSize()); - temp_writer->WriteData(const_data_ptr_cast(string_values[r].GetData()), string_values[r].GetSize()); - } - // flush the dictionary page and add it to the to-be-written pages - WriteDictionary(state, std::move(temp_writer), enum_count); - } - - idx_t GetRowSize(Vector &vector, idx_t index, BasicColumnWriterState &state) override { - return (bit_width + 7) / 8; - } -}; - -//===--------------------------------------------------------------------===// -// Struct Column Writer -//===--------------------------------------------------------------------===// -class StructColumnWriter : public ColumnWriter { -public: - StructColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, vector> child_writers_p, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls), - child_writers(std::move(child_writers_p)) { - } - ~StructColumnWriter() override = default; - - vector> child_writers; - -public: - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override; - bool HasAnalyze() override; - void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - void FinalizeAnalyze(ColumnWriterState &state) override; - void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - - void BeginWrite(ColumnWriterState &state) override; - void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; - void FinalizeWrite(ColumnWriterState &state) override; -}; - -class StructColumnWriterState : public ColumnWriterState { -public: - StructColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - } - ~StructColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - vector> child_states; -}; - -unique_ptr StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_uniq(row_group, row_group.columns.size()); - - result->child_states.reserve(child_writers.size()); - for (auto &child_writer : child_writers) { - result->child_states.push_back(child_writer->InitializeWriteState(row_group)); - } - return std::move(result); -} - -bool StructColumnWriter::HasAnalyze() { - for (auto &child_writer : child_writers) { - if (child_writer->HasAnalyze()) { - return true; - } - } - return false; -} - -void StructColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - auto &child_vectors = StructVector::GetEntries(vector); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - // Need to check again. It might be that just one child needs it but the rest not - if (child_writers[child_idx]->HasAnalyze()) { - child_writers[child_idx]->Analyze(*state.child_states[child_idx], &state_p, *child_vectors[child_idx], - count); - } - } -} - -void StructColumnWriter::FinalizeAnalyze(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - // Need to check again. It might be that just one child needs it but the rest not - if (child_writers[child_idx]->HasAnalyze()) { - child_writers[child_idx]->FinalizeAnalyze(*state.child_states[child_idx]); - } - } -} - -void StructColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - - auto &validity = FlatVector::Validity(vector); - if (parent) { - // propagate empty entries from the parent - while (state.is_empty.size() < parent->is_empty.size()) { - state.is_empty.push_back(parent->is_empty[state.is_empty.size()]); - } - } - HandleRepeatLevels(state_p, parent, count, max_repeat); - HandleDefineLevels(state_p, parent, validity, count, PARQUET_DEFINE_VALID, max_define - 1); - auto &child_vectors = StructVector::GetEntries(vector); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->Prepare(*state.child_states[child_idx], &state_p, *child_vectors[child_idx], count); - } -} - -void StructColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->BeginWrite(*state.child_states[child_idx]); - } -} - -void StructColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - auto &child_vectors = StructVector::GetEntries(vector); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - child_writers[child_idx]->Write(*state.child_states[child_idx], *child_vectors[child_idx], count); - } -} - -void StructColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - for (idx_t child_idx = 0; child_idx < child_writers.size(); child_idx++) { - // we add the null count of the struct to the null count of the children - child_writers[child_idx]->null_count += null_count; - child_writers[child_idx]->FinalizeWrite(*state.child_states[child_idx]); - } -} - -//===--------------------------------------------------------------------===// -// List Column Writer -//===--------------------------------------------------------------------===// -class ListColumnWriter : public ColumnWriter { -public: - ListColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path_p, idx_t max_repeat, - idx_t max_define, unique_ptr child_writer_p, bool can_have_nulls) - : ColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls), - child_writer(std::move(child_writer_p)) { - } - ~ListColumnWriter() override = default; - - unique_ptr child_writer; - -public: - unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override; - bool HasAnalyze() override; - void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - void FinalizeAnalyze(ColumnWriterState &state) override; - void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override; - - void BeginWrite(ColumnWriterState &state) override; - void Write(ColumnWriterState &state, Vector &vector, idx_t count) override; - void FinalizeWrite(ColumnWriterState &state) override; -}; - -class ListColumnWriterState : public ColumnWriterState { -public: - ListColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx) - : row_group(row_group), col_idx(col_idx) { - } - ~ListColumnWriterState() override = default; - - duckdb_parquet::format::RowGroup &row_group; - idx_t col_idx; - unique_ptr child_state; - idx_t parent_index = 0; -}; - -unique_ptr ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) { - auto result = make_uniq(row_group, row_group.columns.size()); - result->child_state = child_writer->InitializeWriteState(row_group); - return std::move(result); -} - -bool ListColumnWriter::HasAnalyze() { - return child_writer->HasAnalyze(); -} -void ListColumnWriter::Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - auto &list_child = ListVector::GetEntry(vector); - auto list_count = ListVector::GetListSize(vector); - child_writer->Analyze(*state.child_state, &state_p, list_child, list_count); -} - -void ListColumnWriter::FinalizeAnalyze(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - child_writer->FinalizeAnalyze(*state.child_state); -} - -void ListColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - - auto list_data = FlatVector::GetData(vector); - auto &validity = FlatVector::Validity(vector); - - // write definition levels and repeats - idx_t start = 0; - idx_t vcount = parent ? parent->definition_levels.size() - state.parent_index : count; - idx_t vector_index = 0; - for (idx_t i = start; i < vcount; i++) { - idx_t parent_index = state.parent_index + i; - if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index]) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(parent->repetition_levels[parent_index]); - state.is_empty.push_back(true); - continue; - } - auto first_repeat_level = - parent && !parent->repetition_levels.empty() ? parent->repetition_levels[parent_index] : max_repeat; - if (parent && parent->definition_levels[parent_index] != PARQUET_DEFINE_VALID) { - state.definition_levels.push_back(parent->definition_levels[parent_index]); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(true); - } else if (validity.RowIsValid(vector_index)) { - // push the repetition levels - if (list_data[vector_index].length == 0) { - state.definition_levels.push_back(max_define); - state.is_empty.push_back(true); - } else { - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } - state.repetition_levels.push_back(first_repeat_level); - for (idx_t k = 1; k < list_data[vector_index].length; k++) { - state.repetition_levels.push_back(max_repeat + 1); - state.definition_levels.push_back(PARQUET_DEFINE_VALID); - state.is_empty.push_back(false); - } - } else { - if (!can_have_nulls) { - throw IOException("Parquet writer: map key column is not allowed to contain NULL values"); - } - state.definition_levels.push_back(max_define - 1); - state.repetition_levels.push_back(first_repeat_level); - state.is_empty.push_back(true); - } - vector_index++; - } - state.parent_index += vcount; - - auto &list_child = ListVector::GetEntry(vector); - Vector child_list(list_child); - auto child_length = ListVector::GetConsecutiveChildList(vector, child_list, 0, count); - child_writer->Prepare(*state.child_state, &state_p, child_list, child_length); -} - -void ListColumnWriter::BeginWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - child_writer->BeginWrite(*state.child_state); -} - -void ListColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t count) { - auto &state = state_p.Cast(); - - auto &list_child = ListVector::GetEntry(vector); - Vector child_list(list_child); - auto child_length = ListVector::GetConsecutiveChildList(vector, child_list, 0, count); - child_writer->Write(*state.child_state, child_list, child_length); -} - -void ListColumnWriter::FinalizeWrite(ColumnWriterState &state_p) { - auto &state = state_p.Cast(); - child_writer->FinalizeWrite(*state.child_state); -} - -//===--------------------------------------------------------------------===// -// Create Column Writer -//===--------------------------------------------------------------------===// -unique_ptr ColumnWriter::CreateWriterRecursive(vector &schemas, - ParquetWriter &writer, const LogicalType &type, - const string &name, vector schema_path, - optional_ptr field_ids, - idx_t max_repeat, idx_t max_define, bool can_have_nulls) { - auto null_type = can_have_nulls ? FieldRepetitionType::OPTIONAL : FieldRepetitionType::REQUIRED; - if (!can_have_nulls) { - max_define--; - } - idx_t schema_idx = schemas.size(); - - optional_ptr field_id; - optional_ptr child_field_ids; - if (field_ids) { - auto field_id_it = field_ids->ids->find(name); - if (field_id_it != field_ids->ids->end()) { - field_id = &field_id_it->second; - child_field_ids = &field_id->child_field_ids; - } - } - - if (type.id() == LogicalTypeId::STRUCT) { - auto &child_types = StructType::GetChildTypes(type); - // set up the schema element for this struct - duckdb_parquet::format::SchemaElement schema_element; - schema_element.repetition_type = null_type; - schema_element.num_children = child_types.size(); - schema_element.__isset.num_children = true; - schema_element.__isset.type = false; - schema_element.__isset.repetition_type = true; - schema_element.name = name; - if (field_id && field_id->set) { - schema_element.__isset.field_id = true; - schema_element.field_id = field_id->field_id; - } - schemas.push_back(std::move(schema_element)); - schema_path.push_back(name); - - // construct the child types recursively - vector> child_writers; - child_writers.reserve(child_types.size()); - for (auto &child_type : child_types) { - child_writers.push_back(CreateWriterRecursive(schemas, writer, child_type.second, child_type.first, - schema_path, child_field_ids, max_repeat, max_define + 1)); - } - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - std::move(child_writers), can_have_nulls); - } - if (type.id() == LogicalTypeId::LIST) { - auto &child_type = ListType::GetChildType(type); - // set up the two schema elements for the list - // for some reason we only set the converted type in the OPTIONAL element - // first an OPTIONAL element - duckdb_parquet::format::SchemaElement optional_element; - optional_element.repetition_type = null_type; - optional_element.num_children = 1; - optional_element.converted_type = ConvertedType::LIST; - optional_element.__isset.num_children = true; - optional_element.__isset.type = false; - optional_element.__isset.repetition_type = true; - optional_element.__isset.converted_type = true; - optional_element.name = name; - if (field_id && field_id->set) { - optional_element.__isset.field_id = true; - optional_element.field_id = field_id->field_id; - } - schemas.push_back(std::move(optional_element)); - schema_path.push_back(name); - - // then a REPEATED element - duckdb_parquet::format::SchemaElement repeated_element; - repeated_element.repetition_type = FieldRepetitionType::REPEATED; - repeated_element.num_children = 1; - repeated_element.__isset.num_children = true; - repeated_element.__isset.type = false; - repeated_element.__isset.repetition_type = true; - repeated_element.name = "list"; - schemas.push_back(std::move(repeated_element)); - schema_path.emplace_back("list"); - - auto child_writer = CreateWriterRecursive(schemas, writer, child_type, "element", schema_path, child_field_ids, - max_repeat + 1, max_define + 2); - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - std::move(child_writer), can_have_nulls); - } - if (type.id() == LogicalTypeId::MAP) { - // map type - // maps are stored as follows: - // group (MAP) { - // repeated group key_value { - // required key; - // value; - // } - // } - // top map element - duckdb_parquet::format::SchemaElement top_element; - top_element.repetition_type = null_type; - top_element.num_children = 1; - top_element.converted_type = ConvertedType::MAP; - top_element.__isset.repetition_type = true; - top_element.__isset.num_children = true; - top_element.__isset.converted_type = true; - top_element.__isset.type = false; - top_element.name = name; - if (field_id && field_id->set) { - top_element.__isset.field_id = true; - top_element.field_id = field_id->field_id; - } - schemas.push_back(std::move(top_element)); - schema_path.push_back(name); - - // key_value element - duckdb_parquet::format::SchemaElement kv_element; - kv_element.repetition_type = FieldRepetitionType::REPEATED; - kv_element.num_children = 2; - kv_element.__isset.repetition_type = true; - kv_element.__isset.num_children = true; - kv_element.__isset.type = false; - kv_element.name = "key_value"; - schemas.push_back(std::move(kv_element)); - schema_path.emplace_back("key_value"); - - // construct the child types recursively - vector kv_types {MapType::KeyType(type), MapType::ValueType(type)}; - vector kv_names {"key", "value"}; - vector> child_writers; - child_writers.reserve(2); - for (idx_t i = 0; i < 2; i++) { - // key needs to be marked as REQUIRED - bool is_key = i == 0; - auto child_writer = CreateWriterRecursive(schemas, writer, kv_types[i], kv_names[i], schema_path, - child_field_ids, max_repeat + 1, max_define + 2, !is_key); - - child_writers.push_back(std::move(child_writer)); - } - auto struct_writer = make_uniq(writer, schema_idx, schema_path, max_repeat, max_define, - std::move(child_writers), can_have_nulls); - return make_uniq(writer, schema_idx, schema_path, max_repeat, max_define, - std::move(struct_writer), can_have_nulls); - } - duckdb_parquet::format::SchemaElement schema_element; - schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type); - schema_element.repetition_type = null_type; - schema_element.__isset.num_children = false; - schema_element.__isset.type = true; - schema_element.__isset.repetition_type = true; - schema_element.name = name; - if (field_id && field_id->set) { - schema_element.__isset.field_id = true; - schema_element.field_id = field_id->field_id; - } - ParquetWriter::SetSchemaProperties(type, schema_element); - schemas.push_back(std::move(schema_element)); - schema_path.push_back(name); - - switch (type.id()) { - case LogicalTypeId::BOOLEAN: - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::TINYINT: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::SMALLINT: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::INTEGER: - case LogicalTypeId::DATE: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::BIGINT: - case LogicalTypeId::TIME: - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP_MS: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::HUGEINT: - return make_uniq>( - writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::TIMESTAMP_NS: - return make_uniq>( - writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::TIMESTAMP_SEC: - return make_uniq>( - writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls); - case LogicalTypeId::UTINYINT: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::USMALLINT: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - case LogicalTypeId::UINTEGER: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - case LogicalTypeId::UBIGINT: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - case LogicalTypeId::FLOAT: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::DOUBLE: - return make_uniq>(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - case LogicalTypeId::DECIMAL: - switch (type.InternalType()) { - case PhysicalType::INT16: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - case PhysicalType::INT32: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - case PhysicalType::INT64: - return make_uniq>(writer, schema_idx, std::move(schema_path), - max_repeat, max_define, can_have_nulls); - default: - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, - max_define, can_have_nulls); - } - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::UUID: - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::INTERVAL: - return make_uniq(writer, schema_idx, std::move(schema_path), max_repeat, max_define, - can_have_nulls); - case LogicalTypeId::ENUM: - return make_uniq(writer, type, schema_idx, std::move(schema_path), max_repeat, max_define, - can_have_nulls); - default: - throw InternalException("Unsupported type \"%s\" in Parquet writer", type.ToString()); - } -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/boolean_column_reader.hpp b/src/duckdb/extension/parquet/include/boolean_column_reader.hpp deleted file mode 100644 index 9410ee301..000000000 --- a/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +++ /dev/null @@ -1,64 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// boolean_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -struct BooleanParquetValueConversion; - -class BooleanColumnReader : public TemplatedColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::BOOL; - -public: - BooleanColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, - max_define_p, max_repeat_p), - byte_pos(0) {}; - - uint8_t byte_pos; - - void InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) override { - byte_pos = 0; - TemplatedColumnReader::InitializeRead(row_group_idx_p, columns, - protocol_p); - } - - void ResetPage() override { - byte_pos = 0; - } -}; - -struct BooleanParquetValueConversion { - static bool DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - throw std::runtime_error("Dicts for booleans make no sense"); - } - - static bool PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.available(1); - auto &byte_pos = reader.Cast().byte_pos; - bool ret = (*plain_data.ptr >> byte_pos) & 1; - byte_pos++; - if (byte_pos == 8) { - byte_pos = 0; - plain_data.inc(1); - } - return ret; - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - PlainRead(plain_data, reader); - } -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/callback_column_reader.hpp b/src/duckdb/extension/parquet/include/callback_column_reader.hpp deleted file mode 100644 index 45c3e726e..000000000 --- a/src/duckdb/extension/parquet/include/callback_column_reader.hpp +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// callback_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" -#include "parquet_reader.hpp" - -namespace duckdb { - -template -class CallbackColumnReader - : public TemplatedColumnReader> { - using BaseType = - TemplatedColumnReader>; - -public: - static constexpr const PhysicalType TYPE = PhysicalType::INVALID; - -public: - CallbackColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : TemplatedColumnReader>( - reader, std::move(type_p), schema_p, file_idx_p, max_define_p, max_repeat_p) { - } - -protected: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) { - BaseType::AllocateDict(num_entries * sizeof(DUCKDB_PHYSICAL_TYPE)); - auto dict_ptr = (DUCKDB_PHYSICAL_TYPE *)this->dict->ptr; - for (idx_t i = 0; i < num_entries; i++) { - dict_ptr[i] = FUNC(dictionary_data->read()); - } - } -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/cast_column_reader.hpp b/src/duckdb/extension/parquet/include/cast_column_reader.hpp deleted file mode 100644 index 640a77bda..000000000 --- a/src/duckdb/extension/parquet/include/cast_column_reader.hpp +++ /dev/null @@ -1,50 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// cast_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -//! A column reader that represents a cast over a child reader -class CastColumnReader : public ColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::INVALID; - -public: - CastColumnReader(unique_ptr child_reader, LogicalType target_type); - - unique_ptr child_reader; - DataChunk intermediate_chunk; - -public: - unique_ptr Stats(idx_t row_group_idx_p, const vector &columns) override; - void InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) override; - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result) override; - - void Skip(idx_t num_values) override; - idx_t GroupRowsAvailable() override; - - uint64_t TotalCompressedSize() override { - return child_reader->TotalCompressedSize(); - } - - idx_t FileOffset() const override { - return child_reader->FileOffset(); - } - - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override { - child_reader->RegisterPrefetch(transport, allow_merge); - } -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/column_reader.hpp b/src/duckdb/extension/parquet/include/column_reader.hpp deleted file mode 100644 index 029b11036..000000000 --- a/src/duckdb/extension/parquet/include/column_reader.hpp +++ /dev/null @@ -1,188 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" -#include "parquet_dbp_decoder.hpp" -#include "parquet_rle_bp_decoder.hpp" -#include "parquet_statistics.hpp" -#include "parquet_types.h" -#include "resizable_buffer.hpp" -#include "thrift_tools.hpp" -#ifndef DUCKDB_AMALGAMATION - -#include "duckdb/common/operator/cast_operators.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/common/types/string_type.hpp" -#include "duckdb/common/types/vector.hpp" -#include "duckdb/common/types/vector_cache.hpp" -#endif - -namespace duckdb { -class ParquetReader; - -using duckdb_apache::thrift::protocol::TProtocol; - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::SchemaElement; -using duckdb_parquet::format::Type; - -typedef std::bitset parquet_filter_t; - -class ColumnReader { -public: - ColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t file_idx_p, - idx_t max_define_p, idx_t max_repeat_p); - virtual ~ColumnReader(); - -public: - static unique_ptr CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t schema_idx_p, idx_t max_define, - idx_t max_repeat); - virtual void InitializeRead(idx_t row_group_index, const vector &columns, TProtocol &protocol_p); - virtual idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result_out); - - virtual void Skip(idx_t num_values); - - ParquetReader &Reader(); - const LogicalType &Type() const; - const SchemaElement &Schema() const; - idx_t FileIdx() const; - idx_t MaxDefine() const; - idx_t MaxRepeat() const; - - virtual idx_t FileOffset() const; - virtual uint64_t TotalCompressedSize(); - virtual idx_t GroupRowsAvailable(); - - // register the range this reader will touch for prefetching - virtual void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge); - - virtual unique_ptr Stats(idx_t row_group_idx_p, const vector &columns); - - template - void PlainTemplated(shared_ptr plain_data, uint8_t *defines, uint64_t num_values, - parquet_filter_t &filter, idx_t result_offset, Vector &result) { - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - result_mask.SetInvalid(row_idx + result_offset); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = CONVERSION::PlainRead(*plain_data, *this); - result_ptr[row_idx + result_offset] = val; - } else { // there is still some data there that we have to skip over - CONVERSION::PlainSkip(*plain_data, *this); - } - } - } - -protected: - Allocator &GetAllocator(); - // readers that use the default Read() need to implement those - virtual void Plain(shared_ptr plain_data, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result); - virtual void Dictionary(shared_ptr dictionary_data, idx_t num_entries); - virtual void Offsets(uint32_t *offsets, uint8_t *defines, idx_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result); - - // these are nops for most types, but not for strings - virtual void DictReference(Vector &result); - virtual void PlainReference(shared_ptr, Vector &result); - - virtual void PrepareDeltaLengthByteArray(ResizeableBuffer &buffer); - virtual void PrepareDeltaByteArray(ResizeableBuffer &buffer); - virtual void DeltaByteArray(uint8_t *defines, idx_t num_values, parquet_filter_t &filter, idx_t result_offset, - Vector &result); - - // applies any skips that were registered using Skip() - virtual void ApplyPendingSkips(idx_t num_values); - - bool HasDefines() { - return max_define > 0; - } - - bool HasRepeats() { - return max_repeat > 0; - } - -protected: - const SchemaElement &schema; - - idx_t file_idx; - idx_t max_define; - idx_t max_repeat; - - ParquetReader &reader; - LogicalType type; - unique_ptr byte_array_data; - idx_t byte_array_count = 0; - - idx_t pending_skips = 0; - - virtual void ResetPage(); - -private: - void AllocateBlock(idx_t size); - void AllocateCompressed(idx_t size); - void PrepareRead(parquet_filter_t &filter); - void PreparePage(PageHeader &page_hdr); - void PrepareDataPage(PageHeader &page_hdr); - void PreparePageV2(PageHeader &page_hdr); - void DecompressInternal(CompressionCodec::type codec, const_data_ptr_t src, idx_t src_size, data_ptr_t dst, - idx_t dst_size); - - const duckdb_parquet::format::ColumnChunk *chunk = nullptr; - - duckdb_apache::thrift::protocol::TProtocol *protocol; - idx_t page_rows_available; - idx_t group_rows_available; - idx_t chunk_read_offset; - - shared_ptr block; - - ResizeableBuffer compressed_buffer; - ResizeableBuffer offset_buffer; - - unique_ptr dict_decoder; - unique_ptr defined_decoder; - unique_ptr repeated_decoder; - unique_ptr dbp_decoder; - unique_ptr rle_decoder; - - // dummies for Skip() - parquet_filter_t none_filter; - ResizeableBuffer dummy_define; - ResizeableBuffer dummy_repeat; - -public: - template - TARGET &Cast() { - if (TARGET::TYPE != PhysicalType::INVALID && type.InternalType() != TARGET::TYPE) { - throw InternalException("Failed to cast column reader to type - type mismatch"); - } - return reinterpret_cast(*this); - } - - template - const TARGET &Cast() const { - if (TARGET::TYPE != PhysicalType::INVALID && type.InternalType() != TARGET::TYPE) { - throw InternalException("Failed to cast column reader to type - type mismatch"); - } - return reinterpret_cast(*this); - } -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/column_writer.hpp b/src/duckdb/extension/parquet/include/column_writer.hpp deleted file mode 100644 index 31b423ff7..000000000 --- a/src/duckdb/extension/parquet/include/column_writer.hpp +++ /dev/null @@ -1,120 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// column_writer.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" -#include "parquet_types.h" - -namespace duckdb { -class BufferedSerializer; -class ParquetWriter; -class ColumnWriterPageState; -class BasicColumnWriterState; -struct ChildFieldIDs; - -class ColumnWriterState { -public: - virtual ~ColumnWriterState(); - - vector definition_levels; - vector repetition_levels; - vector is_empty; - -public: - template - TARGET &Cast() { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } - template - const TARGET &Cast() const { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } -}; - -class ColumnWriterStatistics { -public: - virtual ~ColumnWriterStatistics(); - - virtual string GetMin(); - virtual string GetMax(); - virtual string GetMinValue(); - virtual string GetMaxValue(); - -public: - template - TARGET &Cast() { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } - template - const TARGET &Cast() const { - D_ASSERT(dynamic_cast(this)); - return reinterpret_cast(*this); - } -}; - -class ColumnWriter { - -public: - ColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector schema_path, idx_t max_repeat, - idx_t max_define, bool can_have_nulls); - virtual ~ColumnWriter(); - - ParquetWriter &writer; - idx_t schema_idx; - vector schema_path; - idx_t max_repeat; - idx_t max_define; - bool can_have_nulls; - // collected stats - idx_t null_count; - -public: - //! Create the column writer for a specific type recursively - static unique_ptr CreateWriterRecursive(vector &schemas, - ParquetWriter &writer, const LogicalType &type, - const string &name, vector schema_path, - optional_ptr field_ids, - idx_t max_repeat = 0, idx_t max_define = 1, - bool can_have_nulls = true); - - virtual unique_ptr InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) = 0; - - //! indicates whether the write need to analyse the data before preparing it - virtual bool HasAnalyze() { - return false; - } - - virtual void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) { - throw NotImplementedException("Writer does not need analysis"); - } - - //! Called after all data has been passed to Analyze - virtual void FinalizeAnalyze(ColumnWriterState &state) { - throw NotImplementedException("Writer does not need analysis"); - } - - virtual void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) = 0; - - virtual void BeginWrite(ColumnWriterState &state) = 0; - virtual void Write(ColumnWriterState &state, Vector &vector, idx_t count) = 0; - virtual void FinalizeWrite(ColumnWriterState &state) = 0; - -protected: - void HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, ValidityMask &validity, idx_t count, - uint16_t define_value, uint16_t null_value); - void HandleRepeatLevels(ColumnWriterState &state_p, ColumnWriterState *parent, idx_t count, idx_t max_repeat); - - void CompressPage(BufferedSerializer &temp_writer, size_t &compressed_size, data_ptr_t &compressed_data, - unique_ptr &compressed_buf); -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/decode_utils.hpp b/src/duckdb/extension/parquet/include/decode_utils.hpp deleted file mode 100644 index 1fb6bbc22..000000000 --- a/src/duckdb/extension/parquet/include/decode_utils.hpp +++ /dev/null @@ -1,58 +0,0 @@ -#pragma once - -#include "resizable_buffer.hpp" - -namespace duckdb { -class ParquetDecodeUtils { - -public: - template - static T ZigzagToInt(const T n) { - return (n >> 1) ^ -(n & 1); - } - - static const uint64_t BITPACK_MASKS[]; - static const uint64_t BITPACK_MASKS_SIZE; - static const uint8_t BITPACK_DLEN; - - template - static uint32_t BitUnpack(ByteBuffer &buffer, uint8_t &bitpack_pos, T *dest, uint32_t count, uint8_t width) { - if (width >= ParquetDecodeUtils::BITPACK_MASKS_SIZE) { - throw InvalidInputException("The width (%d) of the bitpacked data exceeds the supported max width (%d), " - "the file might be corrupted.", - width, ParquetDecodeUtils::BITPACK_MASKS_SIZE); - } - auto mask = BITPACK_MASKS[width]; - - for (uint32_t i = 0; i < count; i++) { - T val = (buffer.get() >> bitpack_pos) & mask; - bitpack_pos += width; - while (bitpack_pos > BITPACK_DLEN) { - buffer.inc(1); - val |= (T(buffer.get()) << T(BITPACK_DLEN - (bitpack_pos - width))) & mask; - bitpack_pos -= BITPACK_DLEN; - } - dest[i] = val; - } - return count; - } - - template - static T VarintDecode(ByteBuffer &buf) { - T result = 0; - uint8_t shift = 0; - while (true) { - auto byte = buf.read(); - result |= T(byte & 127) << shift; - if ((byte & 128) == 0) { - break; - } - shift += 7; - if (shift > sizeof(T) * 8) { - throw std::runtime_error("Varint-decoding found too large number"); - } - } - return result; - } -}; -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/list_column_reader.hpp b/src/duckdb/extension/parquet/include/list_column_reader.hpp deleted file mode 100644 index 67565dfbf..000000000 --- a/src/duckdb/extension/parquet/include/list_column_reader.hpp +++ /dev/null @@ -1,60 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// list_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -class ListColumnReader : public ColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::LIST; - -public: - ListColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p, unique_ptr child_column_reader_p); - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result_out) override; - - void ApplyPendingSkips(idx_t num_values) override; - - void InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) override { - child_column_reader->InitializeRead(row_group_idx_p, columns, protocol_p); - } - - idx_t GroupRowsAvailable() override { - return child_column_reader->GroupRowsAvailable() + overflow_child_count; - } - - uint64_t TotalCompressedSize() override { - return child_column_reader->TotalCompressedSize(); - } - - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override { - child_column_reader->RegisterPrefetch(transport, allow_merge); - } - -private: - unique_ptr child_column_reader; - ResizeableBuffer child_defines; - ResizeableBuffer child_repeats; - uint8_t *child_defines_ptr; - uint8_t *child_repeats_ptr; - - VectorCache read_cache; - Vector read_vector; - - parquet_filter_t child_filter; - - idx_t overflow_child_count; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp b/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp deleted file mode 100644 index 601147a6b..000000000 --- a/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +++ /dev/null @@ -1,125 +0,0 @@ -#pragma once -#include "decode_utils.hpp" - -namespace duckdb { -class DbpDecoder { -public: - DbpDecoder(data_ptr_t buffer, uint32_t buffer_len) : buffer_(buffer, buffer_len) { - // - // overall header - block_value_count = ParquetDecodeUtils::VarintDecode(buffer_); - miniblocks_per_block = ParquetDecodeUtils::VarintDecode(buffer_); - total_value_count = ParquetDecodeUtils::VarintDecode(buffer_); - start_value = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode(buffer_)); - - // some derivatives - D_ASSERT(miniblocks_per_block > 0); - values_per_miniblock = block_value_count / miniblocks_per_block; - miniblock_bit_widths = unique_ptr(new data_t[miniblocks_per_block]); - - // init state to something sane - values_left_in_block = 0; - values_left_in_miniblock = 0; - miniblock_offset = 0; - min_delta = 0; - bitpack_pos = 0; - is_first_value = true; - }; - - ByteBuffer BufferPtr() { - if (bitpack_pos != 0) { - buffer_.inc(1); - bitpack_pos = 0; - } - return buffer_; - } - - template - void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) { - auto values = reinterpret_cast(values_target_ptr); - - if (batch_size == 0) { - return; - } - idx_t value_offset = 0; - - if (is_first_value) { - values[0] = start_value; - value_offset++; - is_first_value = false; - } - - if (total_value_count == 1) { // I guess it's a special case - if (batch_size > 1) { - throw std::runtime_error("DBP decode did not find enough values (have 1)"); - } - return; - } - - while (value_offset < batch_size) { - if (values_left_in_block == 0) { // need to open new block - if (bitpack_pos > 0) { // have to eat the leftovers if any - buffer_.inc(1); - } - min_delta = ParquetDecodeUtils::ZigzagToInt(ParquetDecodeUtils::VarintDecode(buffer_)); - for (idx_t miniblock_idx = 0; miniblock_idx < miniblocks_per_block; miniblock_idx++) { - miniblock_bit_widths[miniblock_idx] = buffer_.read(); - // TODO what happens if width is 0? - } - values_left_in_block = block_value_count; - miniblock_offset = 0; - bitpack_pos = 0; - values_left_in_miniblock = values_per_miniblock; - } - if (values_left_in_miniblock == 0) { - miniblock_offset++; - values_left_in_miniblock = values_per_miniblock; - } - - auto read_now = MinValue(values_left_in_miniblock, (idx_t)batch_size - value_offset); - ParquetDecodeUtils::BitUnpack(buffer_, bitpack_pos, &values[value_offset], read_now, - miniblock_bit_widths[miniblock_offset]); - for (idx_t i = value_offset; i < value_offset + read_now; i++) { - values[i] = ((i == 0) ? start_value : values[i - 1]) + min_delta + values[i]; - } - value_offset += read_now; - values_left_in_miniblock -= read_now; - values_left_in_block -= read_now; - } - - if (value_offset != batch_size) { - throw std::runtime_error("DBP decode did not find enough values"); - } - start_value = values[batch_size - 1]; - } - void Finalize() { - if (values_left_in_miniblock == 0) { - return; - } - auto data = unique_ptr(new uint32_t[values_left_in_miniblock]); - GetBatch(data_ptr_cast(data.get()), values_left_in_miniblock); - } - - uint64_t TotalValues() { - return total_value_count; - } - -private: - ByteBuffer buffer_; - idx_t block_value_count; - idx_t miniblocks_per_block; - idx_t total_value_count; - int64_t start_value; - idx_t values_per_miniblock; - - unique_ptr miniblock_bit_widths; - idx_t values_left_in_block; - idx_t values_left_in_miniblock; - idx_t miniblock_offset; - int64_t min_delta; - - bool is_first_value; - - uint8_t bitpack_pos; -}; -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp b/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp deleted file mode 100644 index 33b253538..000000000 --- a/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +++ /dev/null @@ -1,43 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_decimal_utils.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -class ParquetDecimalUtils { -public: - template - static PHYSICAL_TYPE ReadDecimalValue(const_data_ptr_t pointer, idx_t size) { - D_ASSERT(size <= sizeof(PHYSICAL_TYPE)); - PHYSICAL_TYPE res = 0; - - auto res_ptr = (uint8_t *)&res; - bool positive = (*pointer & 0x80) == 0; - - // numbers are stored as two's complement so some muckery is required - for (idx_t i = 0; i < size; i++) { - auto byte = *(pointer + (size - i - 1)); - res_ptr[i] = positive ? byte : byte ^ 0xFF; - } - if (!positive) { - res += 1; - return -res; - } - return res; - } - - static unique_ptr CreateReader(ParquetReader &reader, const LogicalType &type_p, - const SchemaElement &schema_p, idx_t file_idx_p, idx_t max_define, - idx_t max_repeat); -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_extension.hpp b/src/duckdb/extension/parquet/include/parquet_extension.hpp deleted file mode 100644 index d24eeb6a8..000000000 --- a/src/duckdb/extension/parquet/include/parquet_extension.hpp +++ /dev/null @@ -1,13 +0,0 @@ -#pragma once - -#include "duckdb.hpp" - -namespace duckdb { - -class ParquetExtension : public Extension { -public: - void Load(DuckDB &db) override; - std::string Name() override; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp b/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp deleted file mode 100644 index 01d316dc0..000000000 --- a/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +++ /dev/null @@ -1,44 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_file_metadata_cache.hpp -// -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/storage/object_cache.hpp" -#endif -#include "parquet_types.h" - -namespace duckdb { - -//! ParquetFileMetadataCache -class ParquetFileMetadataCache : public ObjectCacheEntry { -public: - ParquetFileMetadataCache() : metadata(nullptr) { - } - ParquetFileMetadataCache(unique_ptr file_metadata, time_t r_time) - : metadata(std::move(file_metadata)), read_time(r_time) { - } - - ~ParquetFileMetadataCache() override = default; - - //! Parquet file metadata - unique_ptr metadata; - - //! read time - time_t read_time; - -public: - static string ObjectType() { - return "parquet_metadata"; - } - - string GetObjectType() override { - return ObjectType(); - } -}; -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_metadata.hpp b/src/duckdb/extension/parquet/include/parquet_metadata.hpp deleted file mode 100644 index f3666b5e1..000000000 --- a/src/duckdb/extension/parquet/include/parquet_metadata.hpp +++ /dev/null @@ -1,26 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_metadata.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "parquet_reader.hpp" -#include "duckdb/function/function_set.hpp" - -namespace duckdb { - -class ParquetMetaDataFunction : public TableFunction { -public: - ParquetMetaDataFunction(); -}; - -class ParquetSchemaFunction : public TableFunction { -public: - ParquetSchemaFunction(); -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_reader.hpp b/src/duckdb/extension/parquet/include/parquet_reader.hpp deleted file mode 100644 index 6a1d5000b..000000000 --- a/src/duckdb/extension/parquet/include/parquet_reader.hpp +++ /dev/null @@ -1,152 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" -#include "duckdb/planner/filter/null_filter.hpp" -#include "duckdb/planner/filter/conjunction_filter.hpp" -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/data_chunk.hpp" -#include "duckdb/common/multi_file_reader_options.hpp" -#include "duckdb/common/multi_file_reader.hpp" -#endif -#include "column_reader.hpp" -#include "parquet_file_metadata_cache.hpp" -#include "parquet_rle_bp_decoder.hpp" -#include "parquet_types.h" -#include "resizable_buffer.hpp" - -#include - -namespace duckdb_parquet { -namespace format { -class FileMetaData; -} -} // namespace duckdb_parquet - -namespace duckdb { -class Allocator; -class ClientContext; -class BaseStatistics; -class TableFilterSet; - -struct ParquetReaderPrefetchConfig { - // Percentage of data in a row group span that should be scanned for enabling whole group prefetch - static constexpr double WHOLE_GROUP_PREFETCH_MINIMUM_SCAN = 0.95; -}; - -struct ParquetReaderScanState { - vector group_idx_list; - int64_t current_group; - idx_t group_offset; - unique_ptr file_handle; - unique_ptr root_reader; - unique_ptr thrift_file_proto; - - bool finished; - SelectionVector sel; - - ResizeableBuffer define_buf; - ResizeableBuffer repeat_buf; - - bool prefetch_mode = false; - bool current_group_prefetched = false; -}; - -struct ParquetOptions { - explicit ParquetOptions() { - } - explicit ParquetOptions(ClientContext &context); - - bool binary_as_string = false; - bool file_row_number = false; - MultiFileReaderOptions file_options; - -public: - void Serialize(FieldWriter &writer) const; - void Deserialize(FieldReader &reader); - - void FormatSerialize(FormatSerializer &serializer) const; - static ParquetOptions FormatDeserialize(FormatDeserializer &deserializer); -}; - -class ParquetReader { -public: - ParquetReader(ClientContext &context, string file_name, ParquetOptions parquet_options); - ParquetReader(ClientContext &context, ParquetOptions parquet_options, - shared_ptr metadata); - ~ParquetReader(); - - FileSystem &fs; - Allocator &allocator; - string file_name; - vector return_types; - vector names; - shared_ptr metadata; - ParquetOptions parquet_options; - MultiFileReaderData reader_data; - unique_ptr root_reader; - -public: - void InitializeScan(ParquetReaderScanState &state, vector groups_to_read); - void Scan(ParquetReaderScanState &state, DataChunk &output); - - idx_t NumRows(); - idx_t NumRowGroups(); - - const duckdb_parquet::format::FileMetaData *GetFileMetadata(); - - unique_ptr ReadStatistics(const string &name); - static LogicalType DeriveLogicalType(const SchemaElement &s_ele, bool binary_as_string); - - FileHandle &GetHandle() { - return *file_handle; - } - - const string &GetFileName() { - return file_name; - } - const vector &GetNames() { - return names; - } - const vector &GetTypes() { - return return_types; - } - -private: - void InitializeSchema(); - bool ScanInternal(ParquetReaderScanState &state, DataChunk &output); - unique_ptr CreateReader(); - - unique_ptr CreateReaderRecursive(idx_t depth, idx_t max_define, idx_t max_repeat, - idx_t &next_schema_idx, idx_t &next_file_idx); - const duckdb_parquet::format::RowGroup &GetGroup(ParquetReaderScanState &state); - uint64_t GetGroupCompressedSize(ParquetReaderScanState &state); - idx_t GetGroupOffset(ParquetReaderScanState &state); - // Group span is the distance between the min page offset and the max page offset plus the max page compressed size - uint64_t GetGroupSpan(ParquetReaderScanState &state); - void PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t out_col_idx); - LogicalType DeriveLogicalType(const SchemaElement &s_ele); - - template - std::runtime_error FormatException(const string fmt_str, Args... params) { - return std::runtime_error("Failed to read Parquet file \"" + file_name + - "\": " + StringUtil::Format(fmt_str, params...)); - } - -private: - unique_ptr file_handle; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp b/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp deleted file mode 100644 index 125edf1dd..000000000 --- a/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +++ /dev/null @@ -1,120 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_rle_bp_decoder.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once -#include "parquet_types.h" -#include "thrift_tools.hpp" -#include "resizable_buffer.hpp" -#include "decode_utils.hpp" - -namespace duckdb { - -class RleBpDecoder { -public: - /// Create a decoder object. buffer/buffer_len is the decoded data. - /// bit_width is the width of each value (before encoding). - RleBpDecoder(data_ptr_t buffer, uint32_t buffer_len, uint32_t bit_width) - : buffer_(buffer, buffer_len), bit_width_(bit_width), current_value_(0), repeat_count_(0), literal_count_(0) { - if (bit_width >= 64) { - throw std::runtime_error("Decode bit width too large"); - } - byte_encoded_len = ((bit_width_ + 7) / 8); - max_val = (uint64_t(1) << bit_width_) - 1; - } - - template - void GetBatch(data_ptr_t values_target_ptr, uint32_t batch_size) { - auto values = reinterpret_cast(values_target_ptr); - uint32_t values_read = 0; - - while (values_read < batch_size) { - if (repeat_count_ > 0) { - int repeat_batch = MinValue(batch_size - values_read, static_cast(repeat_count_)); - std::fill(values + values_read, values + values_read + repeat_batch, static_cast(current_value_)); - repeat_count_ -= repeat_batch; - values_read += repeat_batch; - } else if (literal_count_ > 0) { - uint32_t literal_batch = MinValue(batch_size - values_read, static_cast(literal_count_)); - uint32_t actual_read = ParquetDecodeUtils::BitUnpack(buffer_, bitpack_pos, values + values_read, - literal_batch, bit_width_); - if (literal_batch != actual_read) { - throw std::runtime_error("Did not find enough values"); - } - literal_count_ -= literal_batch; - values_read += literal_batch; - } else { - if (!NextCounts()) { - if (values_read != batch_size) { - throw std::runtime_error("RLE decode did not find enough values"); - } - return; - } - } - } - if (values_read != batch_size) { - throw std::runtime_error("RLE decode did not find enough values"); - } - } - - static uint8_t ComputeBitWidth(idx_t val) { - if (val == 0) { - return 0; - } - uint8_t ret = 1; - while (((idx_t)(1u << ret) - 1) < val) { - ret++; - } - return ret; - } - -private: - ByteBuffer buffer_; - - /// Number of bits needed to encode the value. Must be between 0 and 64. - uint32_t bit_width_; - uint64_t current_value_; - uint32_t repeat_count_; - uint32_t literal_count_; - uint8_t byte_encoded_len; - uint64_t max_val; - - uint8_t bitpack_pos = 0; - - /// Fills literal_count_ and repeat_count_ with next values. Returns false if there - /// are no more. - template - bool NextCounts() { - // Read the next run's indicator int, it could be a literal or repeated run. - // The int is encoded as a vlq-encoded value. - if (bitpack_pos != 0) { - buffer_.inc(1); - bitpack_pos = 0; - } - auto indicator_value = ParquetDecodeUtils::VarintDecode(buffer_); - - // lsb indicates if it is a literal run or repeated run - bool is_literal = indicator_value & 1; - if (is_literal) { - literal_count_ = (indicator_value >> 1) * 8; - } else { - repeat_count_ = indicator_value >> 1; - // (ARROW-4018) this is not big-endian compatible, lol - current_value_ = 0; - for (auto i = 0; i < byte_encoded_len; i++) { - current_value_ |= (buffer_.read() << (i * 8)); - } - // sanity check - if (repeat_count_ > 0 && current_value_ > max_val) { - throw std::runtime_error("Payload value bigger than allowed. Corrupted file?"); - } - } - // TODO complain if we run out of buffer - return true; - } -}; -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp b/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp deleted file mode 100644 index 1fc3b6dea..000000000 --- a/src/duckdb/extension/parquet/include/parquet_rle_bp_encoder.hpp +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_rle_bp_encoder.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "parquet_types.h" -#include "thrift_tools.hpp" -#include "resizable_buffer.hpp" - -namespace duckdb { - -class RleBpEncoder { -public: - RleBpEncoder(uint32_t bit_width); - -public: - //! NOTE: Prepare is only required if a byte count is required BEFORE writing - //! This is the case with e.g. writing repetition/definition levels - //! If GetByteCount() is not required, prepare can be safely skipped - void BeginPrepare(uint32_t first_value); - void PrepareValue(uint32_t value); - void FinishPrepare(); - - void BeginWrite(Serializer &writer, uint32_t first_value); - void WriteValue(Serializer &writer, uint32_t value); - void FinishWrite(Serializer &writer); - - idx_t GetByteCount(); - -private: - //! meta information - uint32_t byte_width; - //! RLE run information - idx_t byte_count; - idx_t run_count; - idx_t current_run_count; - uint32_t last_value; - -private: - void FinishRun(); - void WriteRun(Serializer &writer); -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_statistics.hpp b/src/duckdb/extension/parquet/include/parquet_statistics.hpp deleted file mode 100644 index 23d5cf0dc..000000000 --- a/src/duckdb/extension/parquet/include/parquet_statistics.hpp +++ /dev/null @@ -1,25 +0,0 @@ -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/storage/statistics/base_statistics.hpp" -#endif -#include "parquet_types.h" - -namespace duckdb { - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::SchemaElement; - -struct LogicalType; - -struct ParquetStatisticsUtils { - - static unique_ptr TransformColumnStatistics(const SchemaElement &s_ele, const LogicalType &type, - const ColumnChunk &column_chunk); - - static Value ConvertValue(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele, - const std::string &stats); -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_support.hpp b/src/duckdb/extension/parquet/include/parquet_support.hpp deleted file mode 100644 index 91c43fcb4..000000000 --- a/src/duckdb/extension/parquet/include/parquet_support.hpp +++ /dev/null @@ -1,621 +0,0 @@ -#pragma once - -namespace duckdb { - -class StripeStreams { -public: - virtual ~StripeStreams() = default; - - /** - * get column selector for current stripe reading session - * @return column selector will hold column projection info - */ - virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0; - - // Get row reader options - virtual const dwio::common::RowReaderOptclass StripeStreams { - public: - virtual ~StripeStreams() = default; - - /** - * get column selector for current stripe reading session - * @return column selector will hold column projection info - */ - virtual const dwio::common::ColumnSelector &getColumnSelector() const = 0; - - // Get row reader options - virtual const dwio::common::RowReaderOptions &getRowReaderOptions() const = 0; - - /** - * Get the encoding for the given column for this stripe. - */ - virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0; - - /** - * Get the stream for the given column/kind in this stripe. - * @param streamId stream identifier object - * @param throwIfNotFound fail if a stream is required and not found - * @return the new stream - */ - virtual unique_ptr getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0; - - /** - * visit all streams of given node and execute visitor logic - * return number of streams visited - */ - virtual uint32_t visitStreamsOfNode(uint32_t node, std::function visitor) - const = 0; - - /** - * Get the value of useVInts for the given column in this stripe. - * Defaults to true. - * @param streamId stream identifier - */ - virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0; - - /** - * Get the memory pool for this reader. - */ - virtual memory::MemoryPool &getMemoryPool() const = 0; - - /** - * Get the RowGroupIndex. - * @return a vector of RowIndex belonging to the stripe - */ - virtual unique_ptr getRowGroupIndex(const StreamIdentifier &si) const = 0; - - /** - * Get stride index provider which is used by string dictionary reader to - * get the row index stride index where next() happens - */ - virtual const StrideIndexProvider &getStrideIndexProvider() const = 0; - } - ions &getRowReaderOptions() const = 0; - - /** - * Get the encoding for the given column for this stripe. - */ - virtual const proto::ColumnEncoding &getEncoding(const EncodingKey &) const = 0; - - /** - * Get the stream for the given column/kind in this stripe. - * @param streamId stream identifier object - * @param throwIfNotFound fail if a stream is required and not found - * @return the new stream - */ - virtual unique_ptr getStream(const StreamIdentifier &si, bool throwIfNotFound) const = 0; - - /** - * visit all streams of given node and execute visitor logic - * return number of streams visited - */ - virtual uint32_t visitStreamsOfNode(uint32_t node, - std::function visitor) const = 0; - - /** - * Get the value of useVInts for the given column in this stripe. - * Defaults to true. - * @param streamId stream identifier - */ - virtual bool getUseVInts(const StreamIdentifier &streamId) const = 0; - - /** - * Get the memory pool for this reader. - */ - virtual memory::MemoryPool &getMemoryPool() const = 0; - - /** - * Get the RowGroupIndex. - * @return a vector of RowIndex belonging to the stripe - */ - virtual unique_ptr getRowGroupIndex(const StreamIdentifier &si) const = 0; - - /** - * Get stride index provider which is used by string dictionary reader to - * get the row index stride index where next() happens - */ - virtual const StrideIndexProvider &getStrideIndexProvider() const = 0; -}; - -class ColumnReader { - -public: - ColumnReader(const EncodingKey &ek, StripeStreams &stripe); - - virtual ~ColumnReader() = default; - - /** - * Skip number of specified rows. - * @param numValues the number of values to skip - * @return the number of non-null values skipped - */ - virtual uint64_t skip(uint64_t numValues); - - /** - * Read the next group of values into a RowVector. - * @param numValues the number of values to read - * @param vector to read into - */ - virtual void next(uint64_t numValues, VectorPtr &result, const uint64_t *nulls = nullptr) = 0; -}; - -class SelectiveColumnReader : public ColumnReader { -public: - static constexpr uint64_t kStringBufferSize = 16 * 1024; - - SelectiveColumnReader(const EncodingKey &ek, StripeStreams &stripe, common::ScanSpec *scanSpec); - - /** - * Read the next group of values into a RowVector. - * @param numValues the number of values to read - * @param vector to read into - */ - void next(uint64_t /*numValues*/, VectorPtr & /*result*/, const uint64_t * /*incomingNulls*/) override { - DATALIB_CHECK(false) << "next() is only defined in SelectiveStructColumnReader"; - } - - // Creates a reader for the given stripe. - static unique_ptr build(const std::shared_ptr &requestedType, - const std::shared_ptr &dataType, - StripeStreams &stripe, common::ScanSpec *scanSpec, - uint32_t sequence = 0); - - // Seeks to offset and reads the rows in 'rows' and applies - // filters and value processing as given by 'scanSpec supplied at - // construction. 'offset' is relative to start of stripe. 'rows' are - // relative to 'offset', so that row 0 is the 'offset'th row from - // start of stripe. 'rows' is expected to stay constant - // between this and the next call to read. - virtual void read(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls) = 0; - - // Extracts the values at 'rows' into '*result'. May rewrite or - // reallocate '*result'. 'rows' must be the same set or a subset of - // 'rows' passed to the last 'read(). - virtual void getValues(RowSet rows, VectorPtr *result) = 0; - - // Returns the rows that were selected/visited by the last - // read(). If 'this' has no filter, returns 'rows' passed to last - // read(). - const RowSet outputRows() const { - if (scanSpec_->hasFilter()) { - return outputRows_; - } - return inputRows_; - } - - // Advances to 'offset', so that the next item to be read is the - // offset-th from the start of stripe. - void seekTo(vector_size_t offset, bool readsNullsOnly); - - // The below functions are called from ColumnVisitor to fill the result set. - inline void addOutputRow(vector_size_t row) { - outputRows_.push_back(row); - } - - template - inline void addNull() { - DATALIB_DCHECK(rawResultNulls_ && rawValues_ && (numValues_ + 1) * sizeof(T) < rawSize_); - - anyNulls_ = true; - bits::setBit(rawResultNulls_, numValues_); - reinterpret_cast(rawValues_)[numValues_] = T(); - numValues_++; - } - - template - inline void addValue(const T value) { - // @lint-ignore-every HOWTOEVEN ConstantArgumentPassByValue - static_assert(std::is_pod::value, "General case of addValue is only for primitive types"); - DATALIB_DCHECK(rawValues_ && (numValues _ + 1) * sizeof(T) < rawSize_); - reinterpret_cast(rawValues_)[numValues_] = value; - numValues_++; - } - - void dropResults(vector_size_t count) { - outputRows_.resize(outputRows_.size() - count); - numValues_ -= count; - } - - common::ScanSpec *scanSpec() const { - return scanSpec_; - } - - auto readOffset() const { - return readOffset_; - } - - void setReadOffset(vector_size_t readOffset) { - readOffset_ = readOffset; - } - -protected: - static constexpr int8_t kNoValueSize = -1; - - template - void ensureValuesCapacity(vector_size_t numRows); - - void prepareNulls(vector_size_t numRows, bool needNulls); - - template - void filterNulls(RowSet rows, bool isNull, bool extractValues); - - template - void prepareRead(vector_size_t offset, RowSet rows, const uint64_t *incomingNulls); - - void setOutputRows(RowSet rows) { - outputRows_.resize(rows.size()); - if (!rows.size()) { - return; - } - memcpy(outputRows_.data(), &rows[0], rows.size() * sizeof(vector_size_t)); - } - template - void getFlatValues(RowSet rows, VectorPtr *result); - - template - void compactScalarValues(RowSet rows); - - void addStringValue(folly::StringPiece value); - - // Specification of filters, value extraction, pruning etc. The - // spec is assigned at construction and the contents may change at - // run time based on adaptation. Owned by caller. - common::ScanSpec *const scanSpec_; - // Row number after last read row, relative to stripe start. - vector_size_t readOffset_ = 0; - // The rows to process in read(). References memory supplied by - // caller. The values must remain live until the next call to read(). - RowSet inputRows_; - // Rows passing the filter in readWithVisitor. Must stay - // constant between consecutive calls to read(). - vector outputRows_; - // The row number corresponding to each element in 'values_' - vector valueRows_; - // The set of all nulls in the range of read(). Created when first - // needed and then reused. Not returned to callers. - BufferPtr nullsInReadRange_; - // Nulls buffer for readWithVisitor. Not set if no nulls. 'numValues' - // is the index of the first non-set bit. - BufferPtr resultNulls_; - uint64_t *rawResultNulls_ = nullptr; - // Buffer for gathering scalar values in readWithVisitor. - BufferPtr values_; - // Writable content in 'values' - void *rawValues_ = nullptr; - vector_size_t numValues_ = 0; - // Size of fixed width value in 'rawValues'. For integers, values - // are read at 64 bit width and can be compacted or extracted at a - // different width. - int8_t valueSize_ = kNoValueSize; - // Buffers backing the StringViews in 'values' when reading strings. - vector stringBuffers_; - // Writable contents of 'stringBuffers_.back()'. - char *rawStringBuffer_ = nullptr; - // Total writable bytes in 'rawStringBuffer_'. - int32_t rawStringSize_ = 0; - // Number of written bytes in 'rawStringBuffer_'. - uint32_t rawStringUsed_ = 0; - - // True if last read() added any nulls. - bool anyNulls_ = false; - // True if all values in scope for last read() are null. - bool allNull_ = false; -}; - -struct ExtractValues { - static constexpr bool kSkipNulls = false; - - bool acceptsNulls() const { - return true; - } - - template - void addValue(vector_size_t /*rowIndex*/, V /*value*/) { - } - void addNull(vector_size_t /*rowIndex*/) { - } -}; - -class Filter { -protected: - Filter(bool deterministic, bool nullAllowed, FilterKind kind) - : nullAllowed_(nullAllowed), deterministic_(deterministic), kind_(kind) { - } - -public: - virtual ~Filter() = default; - - // Templates parametrized on filter need to know determinism at compile - // time. If this is false, deterministic() will be consulted at - // runtime. - static constexpr bool deterministic = true; - - FilterKind kind() const { - return kind_; - } - - virtual unique_ptr clone() const = 0; - - /** - * A filter becomes non-deterministic when applies to nested column, - * e.g. a[1] > 10 is non-deterministic because > 10 filter applies only to - * some positions, e.g. first entry in a set of entries that correspond to a - * single top-level position. - */ - virtual bool isDeterministic() const { - return deterministic_; - } - - /** - * When a filter applied to a nested column fails, the whole top-level - * position should fail. To enable this functionality, the filter keeps track - * of the boundaries of top-level positions and allows the caller to find out - * where the current top-level position started and how far it continues. - * @return number of positions from the start of the current top-level - * position up to the current position (excluding current position) - */ - virtual int getPrecedingPositionsToFail() const { - return 0; - } - - /** - * @return number of positions remaining until the end of the current - * top-level position - */ - virtual int getSucceedingPositionsToFail() const { - return 0; - } - - virtual bool testNull() const { - return nullAllowed_; - } - - /** - * Used to apply is [not] null filters to complex types, e.g. - * a[1] is null AND a[3] is not null, where a is an array(array(T)). - * - * In these case, the exact values are not known, but it is known whether they - * are null or not. Furthermore, for some positions only nulls are allowed - * (a[1] is null), for others only non-nulls (a[3] is not null), and for the - * rest both are allowed (a[2] and a[N], where N > 3). - */ - virtual bool testNonNull() const { - DWIO_RAISE("not supported"); - } - - virtual bool testInt64(int64_t /* unused */) const { - DWIO_RAISE("not supported"); - } - - virtual bool testDouble(double /* unused */) const { - DWIO_RAISE("not supported"); - } - - virtual bool testFloat(float /* unused */) const { - DWIO_RAISE("not supported"); - } - - virtual bool testBool(bool /* unused */) const { - DWIO_RAISE("not supported"); - } - - virtual bool testBytes(const char * /* unused */, int32_t /* unused */) const { - DWIO_RAISE("not supported"); - } - - /** - * Filters like string equality and IN, as well as conditions on cardinality - * of lists and maps can be at least partly decided by looking at lengths - * alone. If this is false, then no further checks are needed. If true, - * eventual filters on the data itself need to be evaluated. - */ - virtual bool testLength(int32_t /* unused */) const { - DWIO_RAISE("not supported"); - } - -protected: - const bool nullAllowed_; - -private: - const bool deterministic_; - const FilterKind kind_; -}; - -// Template parameter for controlling filtering and action on a set of rows. -template -class ColumnVisitor { -public: - using FilterType = TFilter; - static constexpr bool dense = isDense; - ColumnVisitor(TFilter &filter, SelectiveColumnReader *reader, const RowSet &rows, ExtractValues values) - : filter_(filter), reader_(reader), allowNulls_(!TFilter::deterministic || filter.testNull()), rows_(&rows[0]), - numRows_(rows.size()), rowIndex_(0), values_(values) { - } - - bool allowNulls() { - if (ExtractValues::kSkipNulls && TFilter::deterministic) { - return false; - } - return allowNulls_ && values_.acceptsNulls(); - } - - vector_size_t start() { - return isDense ? 0 : rowAt(0); - } - - // Tests for a null value and processes it. If the value is not - // null, returns 0 and has no effect. If the value is null, advances - // to the next non-null value in 'rows_'. Returns the number of - // values (not including nulls) to skip to get to the next non-null. - // If there is no next non-null in 'rows_', sets 'atEnd'. If 'atEnd' - // is set and a non-zero skip is returned, the caller must perform - // the skip before returning. - FOLLY_ALWAYS_INLINE vector_size_t checkAndSkipNulls(const uint64_t *nulls, vector_size_t ¤t, bool &atEnd) { - auto testRow = currentRow(); - // Check that the caller and the visitor are in sync about current row. - DATALIB_DCHECK(current == testRow); - uint32_t nullIndex = testRow >> 6; - uint64_t nullWord = nulls[nullIndex]; - if (!nullWord) { - return 0; - } - uint8_t nullBit = testRow & 63; - if ((nullWord & (1UL << nullBit)) == 0) { - return 0; - } - // We have a null. We find the next non-null. - if (++rowIndex_ >= numRows_) { - atEnd = true; - return 0; - } - auto rowOfNullWord = testRow - nullBit; - if (isDense) { - if (nullBit == 63) { - nullBit = 0; - rowOfNullWord += 64; - nullWord = nulls[++nullIndex]; - } else { - ++nullBit; - // set all the bits below the row to null. - nullWord |= f4d::bits::lowMask(nullBit); - } - for (;;) { - auto nextNonNull = count_trailing_zeros(~nullWord); - if (rowOfNullWord + nextNonNull >= numRows_) { - // Nulls all the way to the end. - atEnd = true; - return 0; - } - if (nextNonNull < 64) { - DATALIB_CHECK(rowIndex_ <= rowOfNullWord + nextNonNull); - rowIndex_ = rowOfNullWord + nextNonNull; - current = currentRow(); - return 0; - } - rowOfNullWord += 64; - nullWord = nulls[++nullIndex]; - } - } else { - // Sparse row numbers. We find the first non-null and count - // how many non-nulls on rows not in 'rows_' we skipped. - int32_t toSkip = 0; - nullWord |= f4d::bits::lowMask(nullBit); - for (;;) { - testRow = currentRow(); - while (testRow >= rowOfNullWord + 64) { - toSkip += __builtin_popcountll(~nullWord); - nullWord = nulls[++nullIndex]; - rowOfNullWord += 64; - } - // testRow is inside nullWord. See if non-null. - nullBit = testRow & 63; - if ((nullWord & (1UL << nullBit)) == 0) { - toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(nullBit)); - current = testRow; - return toSkip; - } - if (++rowIndex_ >= numRows_) { - // We end with a null. Add the non-nulls below the final null. - toSkip += __builtin_popcountll(~nullWord & f4d::bits::lowMask(testRow - rowOfNullWord)); - atEnd = true; - return toSkip; - } - } - } - } - - vector_size_t processNull(bool &atEnd) { - vector_size_t previous = currentRow(); - if (filter_.testNull()) { - filterPassedForNull(); - } else { - filterFailed(); - } - if (++rowIndex_ >= numRows_) { - atEnd = true; - return rows_[numRows_ - 1] - previous; - } - if (TFilter::deterministic && isDense) { - return 0; - } - return currentRow() - previous - 1; - } - - FOLLY_ALWAYS_INLINE vector_size_t process(T value, bool &atEnd) { - if (!TFilter::deterministic) { - auto previous = currentRow(); - if (common::applyFilter(filter_, value)) { - filterPassed(value); - } else { - filterFailed(); - } - if (++rowIndex_ >= numRows_) { - atEnd = true; - return rows_[numRows_ - 1] - previous; - } - return currentRow() - previous - 1; - } - // The filter passes or fails and we go to the next row if any. - if (common::applyFilter(filter_, value)) { - filterPassed(value); - } else { - filterFailed(); - } - if (++rowIndex_ >= numRows_) { - atEnd = true; - return 0; - } - if (isDense) { - return 0; - } - return currentRow() - rows_[rowIndex_ - 1] - 1; - } - - inline vector_size_t rowAt(vector_size_t index) { - if (isDense) { - return index; - } - return rows_[index]; - } - - vector_size_t currentRow() { - if (isDense) { - return rowIndex_; - } - return rows_[rowIndex_]; - } - - vector_size_t numRows() { - return numRows_; - } - - void filterPassed(T value) { - addResult(value); - if (!std::is_same::value) { - addOutputRow(currentRow()); - } - } - - inline void filterPassedForNull() { - addNull(); - if (!std::is_same::value) { - addOutputRow(currentRow()); - } - } - - FOLLY_ALWAYS_INLINE void filterFailed(); - inline void addResult(T value); - inline void addNull(); - inline void addOutputRow(vector_size_t row); - -protected: - TFilter &filter_; - SelectiveColumnReader *reader_; - const bool allowNulls_; - const vector_size_t *rows_; - vector_size_t numRows_; - vector_size_t rowIndex_; - ExtractValues values_; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_timestamp.hpp b/src/duckdb/extension/parquet/include/parquet_timestamp.hpp deleted file mode 100644 index 9aec990ff..000000000 --- a/src/duckdb/extension/parquet/include/parquet_timestamp.hpp +++ /dev/null @@ -1,30 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_timestamp.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" - -namespace duckdb { - -struct Int96 { - uint32_t value[3]; -}; - -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts); -Int96 TimestampToImpalaTimestamp(timestamp_t &ts); -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts); -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts); -timestamp_t ParquetTimestampNsToTimestamp(const int64_t &raw_ts); -date_t ParquetIntToDate(const int32_t &raw_date); -dtime_t ParquetIntToTimeMs(const int32_t &raw_time); -dtime_t ParquetIntToTime(const int64_t &raw_time); -dtime_t ParquetIntToTimeNs(const int64_t &raw_time); -dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_time); - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/parquet_writer.hpp b/src/duckdb/extension/parquet/include/parquet_writer.hpp deleted file mode 100644 index 5ff38ee16..000000000 --- a/src/duckdb/extension/parquet/include/parquet_writer.hpp +++ /dev/null @@ -1,97 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// parquet_writer.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/common.hpp" -#include "duckdb/common/exception.hpp" -#include "duckdb/common/mutex.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/types/column/column_data_collection.hpp" -#endif - -#include "column_writer.hpp" -#include "parquet_types.h" -#include "thrift/protocol/TCompactProtocol.h" - -namespace duckdb { -class FileSystem; -class FileOpener; - -struct PreparedRowGroup { - duckdb_parquet::format::RowGroup row_group; - vector> states; - vector> heaps; -}; - -struct FieldID; -struct ChildFieldIDs { - ChildFieldIDs(); - ChildFieldIDs Copy() const; - unique_ptr> ids; -}; - -struct FieldID { - static constexpr const auto DUCKDB_FIELD_ID = "__duckdb_field_id"; - FieldID(); - explicit FieldID(int32_t field_id); - FieldID Copy() const; - bool set; - int32_t field_id; - ChildFieldIDs child_field_ids; -}; - -class ParquetWriter { -public: - ParquetWriter(FileSystem &fs, string file_name, vector types, vector names, - duckdb_parquet::format::CompressionCodec::type codec, ChildFieldIDs field_ids); - -public: - void PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result); - void FlushRowGroup(PreparedRowGroup &row_group); - void Flush(ColumnDataCollection &buffer); - void Finalize(); - - static duckdb_parquet::format::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type); - static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::format::SchemaElement &schema_ele); - - duckdb_apache::thrift::protocol::TProtocol *GetProtocol() { - return protocol.get(); - } - duckdb_parquet::format::CompressionCodec::type GetCodec() { - return codec; - } - duckdb_parquet::format::Type::type GetType(idx_t schema_idx) { - return file_meta_data.schema[schema_idx].type; - } - BufferedFileWriter &GetWriter() { - return *writer; - } - - static bool TypeIsSupported(const LogicalType &type); - -private: - static bool DuckDBTypeToParquetTypeInternal(const LogicalType &duckdb_type, - duckdb_parquet::format::Type::type &type); - string file_name; - vector sql_types; - vector column_names; - duckdb_parquet::format::CompressionCodec::type codec; - ChildFieldIDs field_ids; - - unique_ptr writer; - shared_ptr protocol; - duckdb_parquet::format::FileMetaData file_meta_data; - std::mutex lock; - - vector> column_writers; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/resizable_buffer.hpp b/src/duckdb/extension/parquet/include/resizable_buffer.hpp deleted file mode 100644 index 39ee93388..000000000 --- a/src/duckdb/extension/parquet/include/resizable_buffer.hpp +++ /dev/null @@ -1,88 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// resizable_buffer.hpp -// -// -//===----------------------------------------------------------------------===// -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/allocator.hpp" -#endif - -#include - -namespace duckdb { - -class ByteBuffer { // on to the 10 thousandth impl -public: - ByteBuffer() {}; - ByteBuffer(data_ptr_t ptr, uint64_t len) : ptr(ptr), len(len) {}; - - data_ptr_t ptr = nullptr; - uint64_t len = 0; - -public: - void inc(uint64_t increment) { - available(increment); - len -= increment; - ptr += increment; - } - - template - T read() { - T val = get(); - inc(sizeof(T)); - return val; - } - - template - T get() { - available(sizeof(T)); - T val = Load(ptr); - return val; - } - - void copy_to(char *dest, uint64_t len) { - available(len); - std::memcpy(dest, ptr, len); - } - - void zero() { - std::memset(ptr, 0, len); - } - - void available(uint64_t req_len) { - if (req_len > len) { - throw std::runtime_error("Out of buffer"); - } - } -}; - -class ResizeableBuffer : public ByteBuffer { -public: - ResizeableBuffer() { - } - ResizeableBuffer(Allocator &allocator, uint64_t new_size) { - resize(allocator, new_size); - } - void resize(Allocator &allocator, uint64_t new_size) { - len = new_size; - if (new_size == 0) { - return; - } - if (new_size > alloc_len) { - alloc_len = NextPowerOfTwo(new_size); - allocated_data = allocator.Allocate(alloc_len); - ptr = allocated_data.get(); - } - } - -private: - AllocatedData allocated_data; - idx_t alloc_len = 0; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/row_number_column_reader.hpp b/src/duckdb/extension/parquet/include/row_number_column_reader.hpp deleted file mode 100644 index cdd5df1f3..000000000 --- a/src/duckdb/extension/parquet/include/row_number_column_reader.hpp +++ /dev/null @@ -1,55 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// row_number_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/limits.hpp" -#endif -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -//! Reads a file-absolute row number as a virtual column that's not actually stored in the file -class RowNumberColumnReader : public ColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::INT64; - -public: - RowNumberColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p); - -public: - idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result) override; - - unique_ptr Stats(idx_t row_group_idx_p, const vector &columns) override; - - void InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) override; - - void Skip(idx_t num_values) override { - row_group_offset += num_values; - } - idx_t GroupRowsAvailable() override { - return NumericLimits::Maximum(); - }; - uint64_t TotalCompressedSize() override { - return 0; - } - idx_t FileOffset() const override { - return 0; - } - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override { - } - -private: - idx_t row_group_offset; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/string_column_reader.hpp b/src/duckdb/extension/parquet/include/string_column_reader.hpp deleted file mode 100644 index df2660155..000000000 --- a/src/duckdb/extension/parquet/include/string_column_reader.hpp +++ /dev/null @@ -1,50 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// string_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" - -namespace duckdb { - -struct StringParquetValueConversion { - static string_t DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader); - - static string_t PlainRead(ByteBuffer &plain_data, ColumnReader &reader); - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader); -}; - -class StringColumnReader : public TemplatedColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR; - -public: - StringColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p); - - unique_ptr dict_strings; - idx_t fixed_width_string_length; - idx_t delta_offset = 0; - -public: - void Dictionary(shared_ptr dictionary_data, idx_t num_entries) override; - - void PrepareDeltaLengthByteArray(ResizeableBuffer &buffer) override; - void PrepareDeltaByteArray(ResizeableBuffer &buffer) override; - void DeltaByteArray(uint8_t *defines, idx_t num_values, parquet_filter_t &filter, idx_t result_offset, - Vector &result) override; - static uint32_t VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar); - uint32_t VerifyString(const char *str_data, uint32_t str_len); - -protected: - void DictReference(Vector &result) override; - void PlainReference(shared_ptr plain_data, Vector &result) override; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/struct_column_reader.hpp b/src/duckdb/extension/parquet/include/struct_column_reader.hpp deleted file mode 100644 index b9a9b5eee..000000000 --- a/src/duckdb/extension/parquet/include/struct_column_reader.hpp +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// struct_column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" -#include "templated_column_reader.hpp" - -namespace duckdb { - -class StructColumnReader : public ColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::STRUCT; - -public: - StructColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p, vector> child_readers_p); - - vector> child_readers; - -public: - ColumnReader *GetChildReader(idx_t child_idx); - - void InitializeRead(idx_t row_group_idx_p, const vector &columns, TProtocol &protocol_p) override; - - idx_t Read(uint64_t num_values, parquet_filter_t &filter, data_ptr_t define_out, data_ptr_t repeat_out, - Vector &result) override; - - void Skip(idx_t num_values) override; - idx_t GroupRowsAvailable() override; - uint64_t TotalCompressedSize() override; - void RegisterPrefetch(ThriftFileTransport &transport, bool allow_merge) override; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/templated_column_reader.hpp b/src/duckdb/extension/parquet/include/templated_column_reader.hpp deleted file mode 100644 index 59a1c13c4..000000000 --- a/src/duckdb/extension/parquet/include/templated_column_reader.hpp +++ /dev/null @@ -1,99 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// templated__column_reader.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "column_reader.hpp" - -namespace duckdb { - -template -struct TemplatedParquetValueConversion { - static VALUE_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - D_ASSERT(offset < dict.len / sizeof(VALUE_TYPE)); - return ((VALUE_TYPE *)dict.ptr)[offset]; - } - - static VALUE_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return plain_data.read(); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(VALUE_TYPE)); - } -}; - -template -class TemplatedColumnReader : public ColumnReader { -public: - static constexpr const PhysicalType TYPE = PhysicalType::INVALID; - -public: - TemplatedColumnReader(ParquetReader &reader, LogicalType type_p, const SchemaElement &schema_p, idx_t schema_idx_p, - idx_t max_define_p, idx_t max_repeat_p) - : ColumnReader(reader, std::move(type_p), schema_p, schema_idx_p, max_define_p, max_repeat_p) {}; - - shared_ptr dict; - -public: - void AllocateDict(idx_t size) { - if (!dict) { - dict = make_shared(GetAllocator(), size); - } else { - dict->resize(GetAllocator(), size); - } - } - - void Dictionary(shared_ptr data, idx_t num_entries) override { - dict = std::move(data); - } - - void Offsets(uint32_t *offsets, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - auto result_ptr = FlatVector::GetData(result); - auto &result_mask = FlatVector::Validity(result); - - idx_t offset_idx = 0; - for (idx_t row_idx = 0; row_idx < num_values; row_idx++) { - if (HasDefines() && defines[row_idx + result_offset] != max_define) { - result_mask.SetInvalid(row_idx + result_offset); - continue; - } - if (filter[row_idx + result_offset]) { - VALUE_TYPE val = VALUE_CONVERSION::DictRead(*dict, offsets[offset_idx++], *this); - result_ptr[row_idx + result_offset] = val; - } else { - offset_idx++; - } - } - } - - void Plain(shared_ptr plain_data, uint8_t *defines, uint64_t num_values, parquet_filter_t &filter, - idx_t result_offset, Vector &result) override { - PlainTemplated(std::move(plain_data), defines, num_values, filter, result_offset, - result); - } -}; - -template -struct CallbackParquetValueConversion { - static DUCKDB_PHYSICAL_TYPE DictRead(ByteBuffer &dict, uint32_t &offset, ColumnReader &reader) { - return TemplatedParquetValueConversion::DictRead(dict, offset, reader); - } - - static DUCKDB_PHYSICAL_TYPE PlainRead(ByteBuffer &plain_data, ColumnReader &reader) { - return FUNC(plain_data.read()); - } - - static void PlainSkip(ByteBuffer &plain_data, ColumnReader &reader) { - plain_data.inc(sizeof(PARQUET_PHYSICAL_TYPE)); - } -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/thrift_tools.hpp b/src/duckdb/extension/parquet/include/thrift_tools.hpp deleted file mode 100644 index 2306aa30b..000000000 --- a/src/duckdb/extension/parquet/include/thrift_tools.hpp +++ /dev/null @@ -1,207 +0,0 @@ -#pragma once -#include -#include "thrift/protocol/TCompactProtocol.h" -#include "thrift/transport/TBufferTransports.h" - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/allocator.hpp" -#endif - -namespace duckdb { - -// A ReadHead for prefetching data in a specific range -struct ReadHead { - ReadHead(idx_t location, uint64_t size) : location(location), size(size) {}; - // Hint info - idx_t location; - uint64_t size; - - // Current info - AllocatedData data; - bool data_isset = false; - - idx_t GetEnd() const { - return size + location; - } - - void Allocate(Allocator &allocator) { - data = allocator.Allocate(size); - } -}; - -// Comparator for ReadHeads that are either overlapping, adjacent, or within ALLOW_GAP bytes from each other -struct ReadHeadComparator { - static constexpr uint64_t ALLOW_GAP = 1 << 14; // 16 KiB - bool operator()(const ReadHead *a, const ReadHead *b) const { - auto a_start = a->location; - auto a_end = a->location + a->size; - auto b_start = b->location; - - if (a_end <= NumericLimits::Maximum() - ALLOW_GAP) { - a_end += ALLOW_GAP; - } - - return a_start < b_start && a_end < b_start; - } -}; - -// Two-step read ahead buffer -// 1: register all ranges that will be read, merging ranges that are consecutive -// 2: prefetch all registered ranges -struct ReadAheadBuffer { - ReadAheadBuffer(Allocator &allocator, FileHandle &handle) : allocator(allocator), handle(handle) { - } - - // The list of read heads - std::list read_heads; - // Set for merging consecutive ranges - std::set merge_set; - - Allocator &allocator; - FileHandle &handle; - - idx_t total_size = 0; - - // Add a read head to the prefetching list - void AddReadHead(idx_t pos, uint64_t len, bool merge_buffers = true) { - // Attempt to merge with existing - if (merge_buffers) { - ReadHead new_read_head {pos, len}; - auto lookup_set = merge_set.find(&new_read_head); - if (lookup_set != merge_set.end()) { - auto existing_head = *lookup_set; - auto new_start = MinValue(existing_head->location, new_read_head.location); - auto new_length = MaxValue(existing_head->GetEnd(), new_read_head.GetEnd()) - new_start; - existing_head->location = new_start; - existing_head->size = new_length; - return; - } - } - - read_heads.emplace_front(ReadHead(pos, len)); - total_size += len; - auto &read_head = read_heads.front(); - - if (merge_buffers) { - merge_set.insert(&read_head); - } - - if (read_head.GetEnd() > handle.GetFileSize()) { - throw std::runtime_error("Prefetch registered for bytes outside file"); - } - } - - // Returns the relevant read head - ReadHead *GetReadHead(idx_t pos) { - for (auto &read_head : read_heads) { - if (pos >= read_head.location && pos < read_head.GetEnd()) { - return &read_head; - } - } - return nullptr; - } - - // Prefetch all read heads - void Prefetch() { - for (auto &read_head : read_heads) { - read_head.Allocate(allocator); - - if (read_head.GetEnd() > handle.GetFileSize()) { - throw std::runtime_error("Prefetch registered requested for bytes outside file"); - } - - handle.Read(read_head.data.get(), read_head.size, read_head.location); - read_head.data_isset = true; - } - } -}; - -class ThriftFileTransport : public duckdb_apache::thrift::transport::TVirtualTransport { -public: - static constexpr uint64_t PREFETCH_FALLBACK_BUFFERSIZE = 1000000; - - ThriftFileTransport(Allocator &allocator, FileHandle &handle_p, bool prefetch_mode_p) - : handle(handle_p), location(0), allocator(allocator), ra_buffer(ReadAheadBuffer(allocator, handle_p)), - prefetch_mode(prefetch_mode_p) { - } - - uint32_t read(uint8_t *buf, uint32_t len) { - auto prefetch_buffer = ra_buffer.GetReadHead(location); - if (prefetch_buffer != nullptr && location - prefetch_buffer->location + len <= prefetch_buffer->size) { - D_ASSERT(location - prefetch_buffer->location + len <= prefetch_buffer->size); - - if (!prefetch_buffer->data_isset) { - prefetch_buffer->Allocate(allocator); - handle.Read(prefetch_buffer->data.get(), prefetch_buffer->size, prefetch_buffer->location); - prefetch_buffer->data_isset = true; - } - memcpy(buf, prefetch_buffer->data.get() + location - prefetch_buffer->location, len); - } else { - if (prefetch_mode && len < PREFETCH_FALLBACK_BUFFERSIZE && len > 0) { - Prefetch(location, MinValue(PREFETCH_FALLBACK_BUFFERSIZE, handle.GetFileSize() - location)); - auto prefetch_buffer_fallback = ra_buffer.GetReadHead(location); - D_ASSERT(location - prefetch_buffer_fallback->location + len <= prefetch_buffer_fallback->size); - memcpy(buf, prefetch_buffer_fallback->data.get() + location - prefetch_buffer_fallback->location, len); - } else { - handle.Read(buf, len, location); - } - } - location += len; - return len; - } - - // Prefetch a single buffer - void Prefetch(idx_t pos, uint64_t len) { - RegisterPrefetch(pos, len, false); - FinalizeRegistration(); - PrefetchRegistered(); - } - - // Register a buffer for prefixing - void RegisterPrefetch(idx_t pos, uint64_t len, bool can_merge = true) { - ra_buffer.AddReadHead(pos, len, can_merge); - } - - // Prevents any further merges, should be called before PrefetchRegistered - void FinalizeRegistration() { - ra_buffer.merge_set.clear(); - } - - // Prefetch all previously registered ranges - void PrefetchRegistered() { - ra_buffer.Prefetch(); - } - - void ClearPrefetch() { - ra_buffer.read_heads.clear(); - ra_buffer.merge_set.clear(); - } - - void SetLocation(idx_t location_p) { - location = location_p; - } - - idx_t GetLocation() { - return location; - } - idx_t GetSize() { - return handle.file_system.GetFileSize(handle); - } - -private: - FileHandle &handle; - idx_t location; - - Allocator &allocator; - - // Multi-buffer prefetch - ReadAheadBuffer ra_buffer; - - // Whether the prefetch mode is enabled. In this mode the DirectIO flag of the handle will be set and the parquet - // reader will manage the read buffering. - bool prefetch_mode; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/include/zstd_file_system.hpp b/src/duckdb/extension/parquet/include/zstd_file_system.hpp deleted file mode 100644 index 230aef36d..000000000 --- a/src/duckdb/extension/parquet/include/zstd_file_system.hpp +++ /dev/null @@ -1,31 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// zstd_file_system.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/compressed_file_system.hpp" -#endif - -namespace duckdb { - -class ZStdFileSystem : public CompressedFileSystem { -public: - unique_ptr OpenCompressedFile(unique_ptr handle, bool write) override; - - std::string GetName() const override { - return "ZStdFileSystem"; - } - - unique_ptr CreateStream() override; - idx_t InBufferSize() override; - idx_t OutBufferSize() override; -}; - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_extension.cpp b/src/duckdb/extension/parquet/parquet_extension.cpp deleted file mode 100644 index dd9112e31..000000000 --- a/src/duckdb/extension/parquet/parquet_extension.cpp +++ /dev/null @@ -1,1051 +0,0 @@ -#define DUCKDB_EXTENSION_MAIN - -#include "parquet_extension.hpp" - -#include "duckdb.hpp" -#include "parquet_metadata.hpp" -#include "parquet_reader.hpp" -#include "parquet_writer.hpp" -#include "zstd_file_system.hpp" - -#include -#include -#include -#include -#include -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/catalog/catalog.hpp" -#include "duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp" -#include "duckdb/common/constants.hpp" -#include "duckdb/common/enums/file_compression_type.hpp" -#include "duckdb/common/field_writer.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/multi_file_reader.hpp" -#include "duckdb/common/serializer/format_deserializer.hpp" -#include "duckdb/common/serializer/format_serializer.hpp" -#include "duckdb/common/types/chunk_collection.hpp" -#include "duckdb/function/copy_function.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/main/config.hpp" -#include "duckdb/main/extension_util.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" -#include "duckdb/parser/expression/function_expression.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#include "duckdb/parser/tableref/table_function_ref.hpp" -#include "duckdb/planner/operator/logical_get.hpp" -#include "duckdb/storage/statistics/base_statistics.hpp" -#include "duckdb/storage/table/row_group.hpp" - -#endif - -namespace duckdb { - -struct ParquetReadBindData : public TableFunctionData { - shared_ptr initial_reader; - vector files; - atomic chunk_count; - atomic cur_file; - vector names; - vector types; - - // The union readers are created (when parquet union_by_name option is on) during binding - // Those readers can be re-used during ParquetParallelStateNext - vector> union_readers; - - // These come from the initial_reader, but need to be stored in case the initial_reader is removed by a filter - idx_t initial_file_cardinality; - idx_t initial_file_row_groups; - ParquetOptions parquet_options; - MultiFileReaderBindData reader_bind; - - void Initialize(shared_ptr reader) { - initial_reader = std::move(reader); - initial_file_cardinality = initial_reader->NumRows(); - initial_file_row_groups = initial_reader->NumRowGroups(); - parquet_options = initial_reader->parquet_options; - } -}; - -struct ParquetReadLocalState : public LocalTableFunctionState { - shared_ptr reader; - ParquetReaderScanState scan_state; - bool is_parallel; - idx_t batch_index; - idx_t file_index; - //! The DataChunk containing all read columns (even filter columns that are immediately removed) - DataChunk all_columns; -}; - -enum class ParquetFileState : uint8_t { UNOPENED, OPENING, OPEN, CLOSED }; - -struct ParquetReadGlobalState : public GlobalTableFunctionState { - mutex lock; - - //! The initial reader from the bind phase - shared_ptr initial_reader; - //! Currently opened readers - vector> readers; - //! Flag to indicate a file is being opened - vector file_states; - //! Mutexes to wait for a file that is currently being opened - unique_ptr file_mutexes; - //! Signal to other threads that a file failed to open, letting every thread abort. - bool error_opening_file = false; - - //! Index of file currently up for scanning - idx_t file_index; - //! Index of row group within file currently up for scanning - idx_t row_group_index; - //! Batch index of the next row group to be scanned - idx_t batch_index; - - idx_t max_threads; - vector projection_ids; - vector scanned_types; - vector column_ids; - TableFilterSet *filters; - - idx_t MaxThreads() const override { - return max_threads; - } - - bool CanRemoveFilterColumns() const { - return !projection_ids.empty(); - } -}; - -struct ParquetWriteBindData : public TableFunctionData { - vector sql_types; - vector column_names; - duckdb_parquet::format::CompressionCodec::type codec = duckdb_parquet::format::CompressionCodec::SNAPPY; - idx_t row_group_size = RowGroup::ROW_GROUP_SIZE; - - //! If row_group_size_bytes is not set, we default to row_group_size * BYTES_PER_ROW - static constexpr const idx_t BYTES_PER_ROW = 1024; - idx_t row_group_size_bytes; - - ChildFieldIDs field_ids; -}; - -struct ParquetWriteGlobalState : public GlobalFunctionData { - unique_ptr writer; -}; - -struct ParquetWriteLocalState : public LocalFunctionData { - explicit ParquetWriteLocalState(ClientContext &context, const vector &types) - : buffer(context, types, ColumnDataAllocatorType::HYBRID) { - buffer.InitializeAppend(append_state); - } - - ColumnDataCollection buffer; - ColumnDataAppendState append_state; -}; - -void ParquetOptions::Serialize(FieldWriter &writer) const { - writer.WriteField(binary_as_string); - writer.WriteField(file_row_number); - writer.WriteSerializable(file_options); -} - -void ParquetOptions::Deserialize(FieldReader &reader) { - binary_as_string = reader.ReadRequired(); - file_row_number = reader.ReadRequired(); - file_options = reader.ReadRequiredSerializable(); -} - -BindInfo ParquetGetBatchInfo(const FunctionData *bind_data) { - auto bind_info = BindInfo(ScanType::PARQUET); - auto &parquet_bind = bind_data->Cast(); - vector file_path; - for (auto &path : parquet_bind.files) { - file_path.emplace_back(path); - } - // LCOV_EXCL_START - bind_info.InsertOption("file_path", Value::LIST(LogicalType::VARCHAR, file_path)); - bind_info.InsertOption("binary_as_string", Value::BOOLEAN(parquet_bind.parquet_options.binary_as_string)); - bind_info.InsertOption("file_row_number", Value::BOOLEAN(parquet_bind.parquet_options.file_row_number)); - parquet_bind.parquet_options.file_options.AddBatchInfo(bind_info); - // LCOV_EXCL_STOP - return bind_info; -} - -class ParquetScanFunction { -public: - static TableFunctionSet GetFunctionSet() { - TableFunction table_function("parquet_scan", {LogicalType::VARCHAR}, ParquetScanImplementation, ParquetScanBind, - ParquetScanInitGlobal, ParquetScanInitLocal); - table_function.statistics = ParquetScanStats; - table_function.cardinality = ParquetCardinality; - table_function.table_scan_progress = ParquetProgress; - table_function.named_parameters["binary_as_string"] = LogicalType::BOOLEAN; - table_function.named_parameters["file_row_number"] = LogicalType::BOOLEAN; - table_function.named_parameters["compression"] = LogicalType::VARCHAR; - MultiFileReader::AddParameters(table_function); - table_function.get_batch_index = ParquetScanGetBatchIndex; - table_function.serialize = ParquetScanSerialize; - table_function.deserialize = ParquetScanDeserialize; - table_function.format_serialize = ParquetScanFormatSerialize; - table_function.format_deserialize = ParquetScanFormatDeserialize; - table_function.get_batch_info = ParquetGetBatchInfo; - table_function.projection_pushdown = true; - table_function.filter_pushdown = true; - table_function.filter_prune = true; - table_function.pushdown_complex_filter = ParquetComplexFilterPushdown; - return MultiFileReader::CreateFunctionSet(table_function); - } - - static unique_ptr ParquetReadBind(ClientContext &context, CopyInfo &info, - vector &expected_names, - vector &expected_types) { - D_ASSERT(expected_names.size() == expected_types.size()); - ParquetOptions parquet_options(context); - - for (auto &option : info.options) { - auto loption = StringUtil::Lower(option.first); - if (loption == "compression" || loption == "codec" || loption == "row_group_size") { - // CODEC/COMPRESSION and ROW_GROUP_SIZE options have no effect on parquet read. - // These options are determined from the file. - continue; - } else if (loption == "binary_as_string") { - parquet_options.binary_as_string = true; - } else if (loption == "file_row_number") { - parquet_options.file_row_number = true; - } else { - throw NotImplementedException("Unsupported option for COPY FROM parquet: %s", option.first); - } - } - - auto files = MultiFileReader::GetFileList(context, Value(info.file_path), "Parquet"); - return ParquetScanBindInternal(context, std::move(files), expected_types, expected_names, parquet_options); - } - - static unique_ptr ParquetScanStats(ClientContext &context, const FunctionData *bind_data_p, - column_t column_index) { - auto &bind_data = bind_data_p->Cast(); - - if (IsRowIdColumnId(column_index)) { - return nullptr; - } - - // NOTE: we do not want to parse the Parquet metadata for the sole purpose of getting column statistics - - auto &config = DBConfig::GetConfig(context); - if (bind_data.files.size() < 2) { - if (bind_data.initial_reader) { - // most common path, scanning single parquet file - return bind_data.initial_reader->ReadStatistics(bind_data.names[column_index]); - } else if (!config.options.object_cache_enable) { - // our initial reader was reset - return nullptr; - } - } else if (config.options.object_cache_enable) { - // multiple files, object cache enabled: merge statistics - unique_ptr overall_stats; - - auto &cache = ObjectCache::GetObjectCache(context); - // for more than one file, we could be lucky and metadata for *every* file is in the object cache (if - // enabled at all) - FileSystem &fs = FileSystem::GetFileSystem(context); - - for (idx_t file_idx = 0; file_idx < bind_data.files.size(); file_idx++) { - auto &file_name = bind_data.files[file_idx]; - auto metadata = cache.Get(file_name); - if (!metadata) { - // missing metadata entry in cache, no usable stats - return nullptr; - } - auto handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); - // we need to check if the metadata cache entries are current - if (fs.GetLastModifiedTime(*handle) >= metadata->read_time) { - // missing or invalid metadata entry in cache, no usable stats overall - return nullptr; - } - ParquetReader reader(context, bind_data.parquet_options, metadata); - // get and merge stats for file - auto file_stats = reader.ReadStatistics(bind_data.names[column_index]); - if (!file_stats) { - return nullptr; - } - if (overall_stats) { - overall_stats->Merge(*file_stats); - } else { - overall_stats = std::move(file_stats); - } - } - // success! - return overall_stats; - } - - // multiple files and no object cache, no luck! - return nullptr; - } - - static unique_ptr ParquetScanBindInternal(ClientContext &context, vector files, - vector &return_types, vector &names, - ParquetOptions parquet_options) { - auto result = make_uniq(); - result->files = std::move(files); - result->reader_bind = - MultiFileReader::BindReader(context, result->types, result->names, *result, parquet_options); - if (return_types.empty()) { - // no expected types - just copy the types - return_types = result->types; - names = result->names; - } else { - if (return_types.size() != result->types.size()) { - throw std::runtime_error(StringUtil::Format( - "Failed to read file \"%s\" - column count mismatch: expected %d columns but found %d", - result->files[0], return_types.size(), result->types.size())); - } - // expected types - overwrite the types we want to read instead - result->types = return_types; - } - return std::move(result); - } - - static unique_ptr ParquetScanBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto files = MultiFileReader::GetFileList(context, input.inputs[0], "Parquet"); - ParquetOptions parquet_options(context); - for (auto &kv : input.named_parameters) { - auto loption = StringUtil::Lower(kv.first); - if (MultiFileReader::ParseOption(kv.first, kv.second, parquet_options.file_options, context)) { - continue; - } - if (loption == "binary_as_string") { - parquet_options.binary_as_string = BooleanValue::Get(kv.second); - } else if (loption == "file_row_number") { - parquet_options.file_row_number = BooleanValue::Get(kv.second); - } - } - parquet_options.file_options.AutoDetectHivePartitioning(files, context); - return ParquetScanBindInternal(context, std::move(files), return_types, names, parquet_options); - } - - static double ParquetProgress(ClientContext &context, const FunctionData *bind_data_p, - const GlobalTableFunctionState *global_state) { - auto &bind_data = bind_data_p->Cast(); - if (bind_data.files.empty()) { - return 100.0; - } - if (bind_data.initial_file_cardinality == 0) { - return (100.0 * (bind_data.cur_file + 1)) / bind_data.files.size(); - } - auto percentage = (bind_data.chunk_count * STANDARD_VECTOR_SIZE * 100.0 / bind_data.initial_file_cardinality) / - bind_data.files.size(); - percentage += 100.0 * bind_data.cur_file / bind_data.files.size(); - return percentage; - } - - static unique_ptr - ParquetScanInitLocal(ExecutionContext &context, TableFunctionInitInput &input, GlobalTableFunctionState *gstate_p) { - auto &bind_data = input.bind_data->Cast(); - auto &gstate = gstate_p->Cast(); - - auto result = make_uniq(); - result->is_parallel = true; - result->batch_index = 0; - if (input.CanRemoveFilterColumns()) { - result->all_columns.Initialize(context.client, gstate.scanned_types); - } - if (!ParquetParallelStateNext(context.client, bind_data, *result, gstate)) { - return nullptr; - } - return std::move(result); - } - - static unique_ptr ParquetScanInitGlobal(ClientContext &context, - TableFunctionInitInput &input) { - auto &bind_data = input.bind_data->CastNoConst(); - auto result = make_uniq(); - - result->file_states = vector(bind_data.files.size(), ParquetFileState::UNOPENED); - result->file_mutexes = unique_ptr(new mutex[bind_data.files.size()]); - if (bind_data.files.empty()) { - result->initial_reader = nullptr; - } else { - result->readers = std::move(bind_data.union_readers); - if (result->readers.size() != bind_data.files.size()) { - result->readers = vector>(bind_data.files.size(), nullptr); - } else { - std::fill(result->file_states.begin(), result->file_states.end(), ParquetFileState::OPEN); - } - if (bind_data.initial_reader) { - result->initial_reader = std::move(bind_data.initial_reader); - result->readers[0] = result->initial_reader; - } else if (result->readers[0]) { - result->initial_reader = result->readers[0]; - } else { - result->initial_reader = - make_shared(context, bind_data.files[0], bind_data.parquet_options); - result->readers[0] = result->initial_reader; - } - result->file_states[0] = ParquetFileState::OPEN; - } - for (auto &reader : result->readers) { - if (!reader) { - continue; - } - MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, bind_data.reader_bind, - bind_data.types, bind_data.names, input.column_ids, input.filters, - bind_data.files[0], context); - } - - result->column_ids = input.column_ids; - result->filters = input.filters.get(); - result->row_group_index = 0; - result->file_index = 0; - result->batch_index = 0; - result->max_threads = ParquetScanMaxThreads(context, input.bind_data.get()); - if (input.CanRemoveFilterColumns()) { - result->projection_ids = input.projection_ids; - const auto table_types = bind_data.types; - for (const auto &col_idx : input.column_ids) { - if (IsRowIdColumnId(col_idx)) { - result->scanned_types.emplace_back(LogicalType::ROW_TYPE); - } else { - result->scanned_types.push_back(table_types[col_idx]); - } - } - } - return std::move(result); - } - - static idx_t ParquetScanGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p, - LocalTableFunctionState *local_state, - GlobalTableFunctionState *global_state) { - auto &data = local_state->Cast(); - return data.batch_index; - } - - static void ParquetScanSerialize(FieldWriter &writer, const FunctionData *bind_data_p, - const TableFunction &function) { - auto &bind_data = bind_data_p->Cast(); - writer.WriteList(bind_data.files); - writer.WriteRegularSerializableList(bind_data.types); - writer.WriteList(bind_data.names); - bind_data.parquet_options.Serialize(writer); - } - - static unique_ptr ParquetScanDeserialize(PlanDeserializationState &state, FieldReader &reader, - TableFunction &function) { - auto &context = state.context; - auto files = reader.ReadRequiredList(); - auto types = reader.ReadRequiredSerializableList(); - auto names = reader.ReadRequiredList(); - ParquetOptions options(context); - options.Deserialize(reader); - - return ParquetScanBindInternal(context, files, types, names, options); - } - - static void ParquetScanFormatSerialize(FormatSerializer &serializer, const optional_ptr bind_data_p, - const TableFunction &function) { - auto &bind_data = bind_data_p->Cast(); - serializer.WriteProperty(100, "files", bind_data.files); - serializer.WriteProperty(101, "types", bind_data.types); - serializer.WriteProperty(102, "names", bind_data.names); - serializer.WriteProperty(103, "parquet_options", bind_data.parquet_options); - } - - static unique_ptr ParquetScanFormatDeserialize(FormatDeserializer &deserializer, - TableFunction &function) { - auto &context = deserializer.Get(); - auto files = deserializer.ReadProperty>(100, "files"); - auto types = deserializer.ReadProperty>(101, "types"); - auto names = deserializer.ReadProperty>(102, "names"); - auto parquet_options = deserializer.ReadProperty(103, "parquet_options"); - return ParquetScanBindInternal(context, files, types, names, parquet_options); - } - - static void ParquetScanImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - if (!data_p.local_state) { - return; - } - auto &data = data_p.local_state->Cast(); - auto &gstate = data_p.global_state->Cast(); - auto &bind_data = data_p.bind_data->CastNoConst(); - - do { - if (gstate.CanRemoveFilterColumns()) { - data.all_columns.Reset(); - data.reader->Scan(data.scan_state, data.all_columns); - MultiFileReader::FinalizeChunk(bind_data.reader_bind, data.reader->reader_data, data.all_columns); - output.ReferenceColumns(data.all_columns, gstate.projection_ids); - } else { - data.reader->Scan(data.scan_state, output); - MultiFileReader::FinalizeChunk(bind_data.reader_bind, data.reader->reader_data, output); - } - - bind_data.chunk_count++; - if (output.size() > 0) { - return; - } - if (!ParquetParallelStateNext(context, bind_data, data, gstate)) { - return; - } - } while (true); - } - - static unique_ptr ParquetCardinality(ClientContext &context, const FunctionData *bind_data) { - auto &data = bind_data->Cast(); - return make_uniq(data.initial_file_cardinality * data.files.size()); - } - - static idx_t ParquetScanMaxThreads(ClientContext &context, const FunctionData *bind_data) { - auto &data = bind_data->Cast(); - return data.initial_file_row_groups * data.files.size(); - } - - // This function looks for the next available row group. If not available, it will open files from bind_data.files - // until there is a row group available for scanning or the files runs out - static bool ParquetParallelStateNext(ClientContext &context, const ParquetReadBindData &bind_data, - ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state) { - unique_lock parallel_lock(parallel_state.lock); - - while (true) { - if (parallel_state.error_opening_file) { - return false; - } - - if (parallel_state.file_index >= parallel_state.readers.size()) { - return false; - } - - D_ASSERT(parallel_state.initial_reader); - - if (parallel_state.file_states[parallel_state.file_index] == ParquetFileState::OPEN) { - if (parallel_state.row_group_index < - parallel_state.readers[parallel_state.file_index]->NumRowGroups()) { - // The current reader has rowgroups left to be scanned - scan_data.reader = parallel_state.readers[parallel_state.file_index]; - vector group_indexes {parallel_state.row_group_index}; - scan_data.reader->InitializeScan(scan_data.scan_state, group_indexes); - scan_data.batch_index = parallel_state.batch_index++; - scan_data.file_index = parallel_state.file_index; - parallel_state.row_group_index++; - return true; - } else { - // Close current file - parallel_state.file_states[parallel_state.file_index] = ParquetFileState::CLOSED; - parallel_state.readers[parallel_state.file_index] = nullptr; - - // Set state to the next file - parallel_state.file_index++; - parallel_state.row_group_index = 0; - - if (parallel_state.file_index >= bind_data.files.size()) { - return false; - } - continue; - } - } - - if (TryOpenNextFile(context, bind_data, scan_data, parallel_state, parallel_lock)) { - continue; - } - - // Check if the current file is being opened, in that case we need to wait for it. - if (parallel_state.file_states[parallel_state.file_index] == ParquetFileState::OPENING) { - WaitForFile(parallel_state.file_index, parallel_state, parallel_lock); - } - } - } - - static void ParquetComplexFilterPushdown(ClientContext &context, LogicalGet &get, FunctionData *bind_data_p, - vector> &filters) { - auto &data = bind_data_p->Cast(); - - auto reset_reader = MultiFileReader::ComplexFilterPushdown(context, data.files, - data.parquet_options.file_options, get, filters); - if (reset_reader) { - MultiFileReader::PruneReaders(data); - } - } - - //! Wait for a file to become available. Parallel lock should be locked when calling. - static void WaitForFile(idx_t file_index, ParquetReadGlobalState ¶llel_state, - unique_lock ¶llel_lock) { - while (true) { - // To get the file lock, we first need to release the parallel_lock to prevent deadlocking - parallel_lock.unlock(); - unique_lock current_file_lock(parallel_state.file_mutexes[file_index]); - parallel_lock.lock(); - - // Here we have both locks which means we can stop waiting if: - // - the thread opening the file is done and the file is available - // - the thread opening the file has failed - // - the file was somehow scanned till the end while we were waiting - if (parallel_state.file_index >= parallel_state.readers.size() || - parallel_state.file_states[parallel_state.file_index] != ParquetFileState::OPENING || - parallel_state.error_opening_file) { - return; - } - } - } - - //! Helper function that try to start opening a next file. Parallel lock should be locked when calling. - static bool TryOpenNextFile(ClientContext &context, const ParquetReadBindData &bind_data, - ParquetReadLocalState &scan_data, ParquetReadGlobalState ¶llel_state, - unique_lock ¶llel_lock) { - const auto num_threads = TaskScheduler::GetScheduler(context).NumberOfThreads(); - const auto file_index_limit = MinValue(parallel_state.file_index + num_threads, bind_data.files.size()); - for (idx_t i = parallel_state.file_index; i < file_index_limit; i++) { - if (parallel_state.file_states[i] == ParquetFileState::UNOPENED) { - string file = bind_data.files[i]; - parallel_state.file_states[i] = ParquetFileState::OPENING; - auto pq_options = parallel_state.initial_reader->parquet_options; - - // Now we switch which lock we are holding, instead of locking the global state, we grab the lock on - // the file we are opening. This file lock allows threads to wait for a file to be opened. - parallel_lock.unlock(); - - unique_lock file_lock(parallel_state.file_mutexes[i]); - - shared_ptr reader; - try { - reader = make_shared(context, file, pq_options); - MultiFileReader::InitializeReader(*reader, bind_data.parquet_options.file_options, - bind_data.reader_bind, bind_data.types, bind_data.names, - parallel_state.column_ids, parallel_state.filters, - bind_data.files.front(), context); - } catch (...) { - parallel_lock.lock(); - parallel_state.error_opening_file = true; - throw; - } - - // Now re-lock the state and add the reader - parallel_lock.lock(); - parallel_state.readers[i] = reader; - parallel_state.file_states[i] = ParquetFileState::OPEN; - - return true; - } - } - - return false; - } -}; - -static case_insensitive_map_t GetChildNameToTypeMap(const LogicalType &type) { - case_insensitive_map_t name_to_type_map; - switch (type.id()) { - case LogicalTypeId::LIST: - name_to_type_map.emplace("element", ListType::GetChildType(type)); - break; - case LogicalTypeId::MAP: - name_to_type_map.emplace("key", MapType::KeyType(type)); - name_to_type_map.emplace("value", MapType::ValueType(type)); - break; - case LogicalTypeId::STRUCT: - for (auto &child_type : StructType::GetChildTypes(type)) { - if (child_type.first == FieldID::DUCKDB_FIELD_ID) { - throw BinderException("Cannot have column named \"%s\" with FIELD_IDS", FieldID::DUCKDB_FIELD_ID); - } - name_to_type_map.emplace(child_type); - } - break; - default: // LCOV_EXCL_START - throw InternalException("Unexpected type in GetChildNameToTypeMap"); - } // LCOV_EXCL_STOP - return name_to_type_map; -} - -static void GetChildNamesAndTypes(const LogicalType &type, vector &child_names, - vector &child_types) { - switch (type.id()) { - case LogicalTypeId::LIST: - child_names.emplace_back("element"); - child_types.emplace_back(ListType::GetChildType(type)); - break; - case LogicalTypeId::MAP: - child_names.emplace_back("key"); - child_names.emplace_back("value"); - child_types.emplace_back(MapType::KeyType(type)); - child_types.emplace_back(MapType::ValueType(type)); - break; - case LogicalTypeId::STRUCT: - for (auto &child_type : StructType::GetChildTypes(type)) { - child_names.emplace_back(child_type.first); - child_types.emplace_back(child_type.second); - } - break; - default: // LCOV_EXCL_START - throw InternalException("Unexpected type in GetChildNamesAndTypes"); - } // LCOV_EXCL_STOP -} - -static void GenerateFieldIDs(ChildFieldIDs &field_ids, idx_t &field_id, const vector &names, - const vector &sql_types) { - D_ASSERT(names.size() == sql_types.size()); - for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) { - const auto &col_name = names[col_idx]; - auto inserted = field_ids.ids->insert(make_pair(col_name, FieldID(field_id++))); - D_ASSERT(inserted.second); - - const auto &col_type = sql_types[col_idx]; - if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP && - col_type.id() != LogicalTypeId::STRUCT) { - continue; - } - - // Cannot use GetChildNameToTypeMap here because we lose order, and we want to generate depth-first - vector child_names; - vector child_types; - GetChildNamesAndTypes(col_type, child_names, child_types); - - GenerateFieldIDs(inserted.first->second.child_field_ids, field_id, child_names, child_types); - } -} - -static void GetFieldIDs(const Value &field_ids_value, ChildFieldIDs &field_ids, - unordered_set &unique_field_ids, - const case_insensitive_map_t &name_to_type_map) { - const auto &struct_type = field_ids_value.type(); - if (struct_type.id() != LogicalTypeId::STRUCT) { - throw BinderException( - "Expected FIELD_IDS to be a STRUCT, e.g., {col1: 42, col2: {%s: 43, nested_col: 44}, col3: 44}", - FieldID::DUCKDB_FIELD_ID); - } - const auto &struct_children = StructValue::GetChildren(field_ids_value); - D_ASSERT(StructType::GetChildTypes(struct_type).size() == struct_children.size()); - for (idx_t i = 0; i < struct_children.size(); i++) { - const auto &col_name = StringUtil::Lower(StructType::GetChildName(struct_type, i)); - if (col_name == FieldID::DUCKDB_FIELD_ID) { - continue; - } - - auto it = name_to_type_map.find(col_name); - if (it == name_to_type_map.end()) { - string names; - for (const auto &name : name_to_type_map) { - if (!names.empty()) { - names += ", "; - } - names += name.first; - } - throw BinderException("Column name \"%s\" specified in FIELD_IDS not found. Available column names: [%s]", - col_name, names); - } - D_ASSERT(field_ids.ids->find(col_name) == field_ids.ids->end()); // Caught by STRUCT - deduplicates keys - - const auto &child_value = struct_children[i]; - const auto &child_type = child_value.type(); - optional_ptr field_id_value; - optional_ptr child_field_ids_value; - - if (child_type.id() == LogicalTypeId::STRUCT) { - const auto &nested_children = StructValue::GetChildren(child_value); - D_ASSERT(StructType::GetChildTypes(child_type).size() == nested_children.size()); - for (idx_t nested_i = 0; nested_i < nested_children.size(); nested_i++) { - const auto &field_id_or_nested_col = StructType::GetChildName(child_type, nested_i); - if (field_id_or_nested_col == FieldID::DUCKDB_FIELD_ID) { - field_id_value = &nested_children[nested_i]; - } else { - child_field_ids_value = &child_value; - } - } - } else { - field_id_value = &child_value; - } - - FieldID field_id; - if (field_id_value) { - Value field_id_integer_value = field_id_value->DefaultCastAs(LogicalType::INTEGER); - const uint32_t field_id_int = IntegerValue::Get(field_id_integer_value); - if (!unique_field_ids.insert(field_id_int).second) { - throw BinderException("Duplicate field_id %s found in FIELD_IDS", field_id_integer_value.ToString()); - } - field_id = FieldID(field_id_int); - } - auto inserted = field_ids.ids->insert(make_pair(col_name, std::move(field_id))); - D_ASSERT(inserted.second); - - if (child_field_ids_value) { - const auto &col_type = it->second; - if (col_type.id() != LogicalTypeId::LIST && col_type.id() != LogicalTypeId::MAP && - col_type.id() != LogicalTypeId::STRUCT) { - throw BinderException("Column \"%s\" with type \"%s\" cannot have a nested FIELD_IDS specification", - col_name, LogicalTypeIdToString(col_type.id())); - } - - GetFieldIDs(*child_field_ids_value, inserted.first->second.child_field_ids, unique_field_ids, - GetChildNameToTypeMap(col_type)); - } - } -} - -unique_ptr ParquetWriteBind(ClientContext &context, CopyInfo &info, vector &names, - vector &sql_types) { - D_ASSERT(names.size() == sql_types.size()); - bool row_group_size_bytes_set = false; - auto bind_data = make_uniq(); - for (auto &option : info.options) { - const auto loption = StringUtil::Lower(option.first); - if (option.second.size() != 1) { - // All parquet write options require exactly one argument - throw BinderException("%s requires exactly one argument", StringUtil::Upper(loption)); - } - if (loption == "row_group_size" || loption == "chunk_size") { - bind_data->row_group_size = option.second[0].GetValue(); - } else if (loption == "row_group_size_bytes") { - auto roption = option.second[0]; - if (roption.GetTypeMutable().id() == LogicalTypeId::VARCHAR) { - bind_data->row_group_size_bytes = DBConfig::ParseMemoryLimit(roption.ToString()); - } else { - bind_data->row_group_size_bytes = option.second[0].GetValue(); - } - row_group_size_bytes_set = true; - } else if (loption == "compression" || loption == "codec") { - const auto roption = StringUtil::Lower(option.second[0].ToString()); - if (roption == "uncompressed") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::UNCOMPRESSED; - } else if (roption == "snappy") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::SNAPPY; - } else if (roption == "gzip") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::GZIP; - } else if (roption == "zstd") { - bind_data->codec = duckdb_parquet::format::CompressionCodec::ZSTD; - } else { - throw BinderException("Expected %s argument to be either [uncompressed, snappy, gzip or zstd]", - loption); - } - } else if (loption == "field_ids") { - if (option.second[0].type().id() == LogicalTypeId::VARCHAR && - StringUtil::Lower(StringValue::Get(option.second[0])) == "auto") { - idx_t field_id = 0; - GenerateFieldIDs(bind_data->field_ids, field_id, names, sql_types); - } else { - unordered_set unique_field_ids; - case_insensitive_map_t name_to_type_map; - for (idx_t col_idx = 0; col_idx < names.size(); col_idx++) { - if (names[col_idx] == FieldID::DUCKDB_FIELD_ID) { - throw BinderException("Cannot have a column named \"%s\" when writing FIELD_IDS", - FieldID::DUCKDB_FIELD_ID); - } - name_to_type_map.emplace(names[col_idx], sql_types[col_idx]); - } - GetFieldIDs(option.second[0], bind_data->field_ids, unique_field_ids, name_to_type_map); - } - } else { - throw NotImplementedException("Unrecognized option for PARQUET: %s", option.first.c_str()); - } - } - if (!row_group_size_bytes_set) { - bind_data->row_group_size_bytes = bind_data->row_group_size * ParquetWriteBindData::BYTES_PER_ROW; - } - - bind_data->sql_types = sql_types; - bind_data->column_names = names; - return std::move(bind_data); -} - -unique_ptr ParquetWriteInitializeGlobal(ClientContext &context, FunctionData &bind_data, - const string &file_path) { - auto global_state = make_uniq(); - auto &parquet_bind = bind_data.Cast(); - - auto &fs = FileSystem::GetFileSystem(context); - global_state->writer = make_uniq(fs, file_path, parquet_bind.sql_types, parquet_bind.column_names, - parquet_bind.codec, parquet_bind.field_ids.Copy()); - return std::move(global_state); -} - -void ParquetWriteSink(ExecutionContext &context, FunctionData &bind_data_p, GlobalFunctionData &gstate, - LocalFunctionData &lstate, DataChunk &input) { - auto &bind_data = bind_data_p.Cast(); - auto &global_state = gstate.Cast(); - auto &local_state = lstate.Cast(); - - // append data to the local (buffered) chunk collection - local_state.buffer.Append(local_state.append_state, input); - - if (local_state.buffer.Count() > bind_data.row_group_size || - local_state.buffer.SizeInBytes() > bind_data.row_group_size_bytes) { - // if the chunk collection exceeds a certain size (rows/bytes) we flush it to the parquet file - local_state.append_state.current_chunk_state.handles.clear(); - global_state.writer->Flush(local_state.buffer); - local_state.buffer.InitializeAppend(local_state.append_state); - } -} - -void ParquetWriteCombine(ExecutionContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - LocalFunctionData &lstate) { - auto &global_state = gstate.Cast(); - auto &local_state = lstate.Cast(); - // flush any data left in the local state to the file - global_state.writer->Flush(local_state.buffer); -} - -void ParquetWriteFinalize(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate) { - auto &global_state = gstate.Cast(); - // finalize: write any additional metadata to the file here - global_state.writer->Finalize(); -} - -unique_ptr ParquetWriteInitializeLocal(ExecutionContext &context, FunctionData &bind_data_p) { - auto &bind_data = bind_data_p.Cast(); - return make_uniq(context.client, bind_data.sql_types); -} - -// LCOV_EXCL_START -static void ParquetCopySerialize(FieldWriter &writer, const FunctionData &bind_data_p, const CopyFunction &function) { - auto &bind_data = bind_data_p.Cast(); - writer.WriteRegularSerializableList(bind_data.sql_types); - writer.WriteList(bind_data.column_names); - writer.WriteField(bind_data.codec); - writer.WriteField(bind_data.row_group_size); -} - -static unique_ptr ParquetCopyDeserialize(ClientContext &context, FieldReader &reader, - CopyFunction &function) { - unique_ptr data = make_uniq(); - - data->sql_types = reader.ReadRequiredSerializableList(); - data->column_names = reader.ReadRequiredList(); - data->codec = reader.ReadRequired(); - data->row_group_size = reader.ReadRequired(); - - return std::move(data); -} -// LCOV_EXCL_STOP - -//===--------------------------------------------------------------------===// -// Execution Mode -//===--------------------------------------------------------------------===// -CopyFunctionExecutionMode ParquetWriteExecutionMode(bool preserve_insertion_order, bool supports_batch_index) { - if (!preserve_insertion_order) { - return CopyFunctionExecutionMode::PARALLEL_COPY_TO_FILE; - } - if (supports_batch_index) { - return CopyFunctionExecutionMode::BATCH_COPY_TO_FILE; - } - return CopyFunctionExecutionMode::REGULAR_COPY_TO_FILE; -} -//===--------------------------------------------------------------------===// -// Prepare Batch -//===--------------------------------------------------------------------===// -struct ParquetWriteBatchData : public PreparedBatchData { - PreparedRowGroup prepared_row_group; -}; - -unique_ptr ParquetWritePrepareBatch(ClientContext &context, FunctionData &bind_data, - GlobalFunctionData &gstate, - unique_ptr collection) { - auto &global_state = gstate.Cast(); - auto result = make_uniq(); - global_state.writer->PrepareRowGroup(*collection, result->prepared_row_group); - return std::move(result); -} - -//===--------------------------------------------------------------------===// -// Flush Batch -//===--------------------------------------------------------------------===// -void ParquetWriteFlushBatch(ClientContext &context, FunctionData &bind_data, GlobalFunctionData &gstate, - PreparedBatchData &batch_p) { - auto &global_state = gstate.Cast(); - auto &batch = batch_p.Cast(); - global_state.writer->FlushRowGroup(batch.prepared_row_group); -} - -//===--------------------------------------------------------------------===// -// Desired Batch Size -//===--------------------------------------------------------------------===// -idx_t ParquetWriteDesiredBatchSize(ClientContext &context, FunctionData &bind_data_p) { - auto &bind_data = bind_data_p.Cast(); - return bind_data.row_group_size; -} - -//===--------------------------------------------------------------------===// -// Scan Replacement -//===--------------------------------------------------------------------===// -unique_ptr ParquetScanReplacement(ClientContext &context, const string &table_name, - ReplacementScanData *data) { - auto lower_name = StringUtil::Lower(table_name); - if (!StringUtil::EndsWith(lower_name, ".parquet") && !StringUtil::Contains(lower_name, ".parquet?")) { - return nullptr; - } - auto table_function = make_uniq(); - vector> children; - children.push_back(make_uniq(Value(table_name))); - table_function->function = make_uniq("parquet_scan", std::move(children)); - - if (!FileSystem::HasGlob(table_name)) { - auto &fs = FileSystem::GetFileSystem(context); - table_function->alias = fs.ExtractBaseName(table_name); - } - - return std::move(table_function); -} - -void ParquetExtension::Load(DuckDB &db) { - auto &db_instance = *db.instance; - auto &fs = db.GetFileSystem(); - fs.RegisterSubSystem(FileCompressionType::ZSTD, make_uniq()); - - auto scan_fun = ParquetScanFunction::GetFunctionSet(); - scan_fun.name = "read_parquet"; - ExtensionUtil::RegisterFunction(db_instance, scan_fun); - scan_fun.name = "parquet_scan"; - ExtensionUtil::RegisterFunction(db_instance, scan_fun); - - // parquet_metadata - ParquetMetaDataFunction meta_fun; - ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(meta_fun)); - - // parquet_schema - ParquetSchemaFunction schema_fun; - ExtensionUtil::RegisterFunction(db_instance, MultiFileReader::CreateFunctionSet(schema_fun)); - - CopyFunction function("parquet"); - function.copy_to_bind = ParquetWriteBind; - function.copy_to_initialize_global = ParquetWriteInitializeGlobal; - function.copy_to_initialize_local = ParquetWriteInitializeLocal; - function.copy_to_sink = ParquetWriteSink; - function.copy_to_combine = ParquetWriteCombine; - function.copy_to_finalize = ParquetWriteFinalize; - function.execution_mode = ParquetWriteExecutionMode; - function.copy_from_bind = ParquetScanFunction::ParquetReadBind; - function.copy_from_function = scan_fun.functions[0]; - function.prepare_batch = ParquetWritePrepareBatch; - function.flush_batch = ParquetWriteFlushBatch; - function.desired_batch_size = ParquetWriteDesiredBatchSize; - function.serialize = ParquetCopySerialize; - function.deserialize = ParquetCopyDeserialize; - function.supports_type = ParquetWriter::TypeIsSupported; - - function.extension = "parquet"; - ExtensionUtil::RegisterFunction(db_instance, function); - - auto &config = DBConfig::GetConfig(*db.instance); - config.replacement_scans.emplace_back(ParquetScanReplacement); - config.AddExtensionOption("binary_as_string", "In Parquet files, interpret binary data as a string.", - LogicalType::BOOLEAN); -} - -std::string ParquetExtension::Name() { - return "parquet"; -} - -} // namespace duckdb - -#ifdef DUCKDB_BUILD_LOADABLE_EXTENSION -extern "C" { - -DUCKDB_EXTENSION_API void parquet_init(duckdb::DatabaseInstance &db) { // NOLINT - duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); -} - -DUCKDB_EXTENSION_API const char *parquet_version() { // NOLINT - return duckdb::DuckDB::LibraryVersion(); -} -} -#endif - -#ifndef DUCKDB_EXTENSION_MAIN -#error DUCKDB_EXTENSION_MAIN not defined -#endif diff --git a/src/duckdb/extension/parquet/parquet_metadata.cpp b/src/duckdb/extension/parquet/parquet_metadata.cpp deleted file mode 100644 index f37194291..000000000 --- a/src/duckdb/extension/parquet/parquet_metadata.cpp +++ /dev/null @@ -1,499 +0,0 @@ -#include "parquet_metadata.hpp" - -#include "parquet_statistics.hpp" - -#include - -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/multi_file_reader.hpp" -#include "duckdb/common/types/blob.hpp" -#include "duckdb/common/types/column/column_data_collection.hpp" -#include "duckdb/main/config.hpp" -#endif - -namespace duckdb { - -struct ParquetMetaDataBindData : public TableFunctionData { - vector return_types; - vector files; - -public: - bool Equals(const FunctionData &other_p) const override { - auto &other = other_p.Cast(); - return other.return_types == return_types && files == other.files; - } -}; - -struct ParquetMetaDataOperatorData : public GlobalTableFunctionState { - explicit ParquetMetaDataOperatorData(ClientContext &context, const vector &types) - : collection(context, types) { - } - - idx_t file_index; - ColumnDataCollection collection; - ColumnDataScanState scan_state; - -public: - static void BindMetaData(vector &return_types, vector &names); - static void BindSchema(vector &return_types, vector &names); - - void LoadFileMetaData(ClientContext &context, const vector &return_types, const string &file_path); - void LoadSchemaData(ClientContext &context, const vector &return_types, const string &file_path); -}; - -template -string ConvertParquetElementToString(T &&entry) { - std::stringstream ss; - ss << entry; - return ss.str(); -} - -template -string PrintParquetElementToString(T &&entry) { - std::stringstream ss; - entry.printTo(ss); - return ss.str(); -} - -template -Value ParquetElementString(T &&value, bool is_set) { - if (!is_set) { - return Value(); - } - return Value(ConvertParquetElementToString(value)); -} - -template -Value ParquetElementInteger(T &&value, bool is_iset) { - if (!is_iset) { - return Value(); - } - return Value::INTEGER(value); -} - -template -Value ParquetElementBigint(T &&value, bool is_iset) { - if (!is_iset) { - return Value(); - } - return Value::BIGINT(value); -} - -void ParquetMetaDataOperatorData::BindMetaData(vector &return_types, vector &names) { - names.emplace_back("file_name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("row_group_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_num_rows"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_num_columns"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("row_group_bytes"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("column_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("file_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("num_values"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("path_in_schema"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_min"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_max"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_null_count"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("stats_distinct_count"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("stats_min_value"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("stats_max_value"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("compression"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("encodings"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("index_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("dictionary_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("data_page_offset"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("total_compressed_size"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("total_uncompressed_size"); - return_types.emplace_back(LogicalType::BIGINT); -} - -Value ConvertParquetStats(const LogicalType &type, const duckdb_parquet::format::SchemaElement &schema_ele, - bool stats_is_set, const std::string &stats) { - if (!stats_is_set) { - return Value(LogicalType::VARCHAR); - } - return ParquetStatisticsUtils::ConvertValue(type, schema_ele, stats).DefaultCastAs(LogicalType::VARCHAR); -} - -void ParquetMetaDataOperatorData::LoadFileMetaData(ClientContext &context, const vector &return_types, - const string &file_path) { - collection.Reset(); - ParquetOptions parquet_options(context); - auto reader = make_uniq(context, file_path, parquet_options); - idx_t count = 0; - DataChunk current_chunk; - current_chunk.Initialize(context, return_types); - auto meta_data = reader->GetFileMetadata(); - vector column_types; - vector schema_indexes; - for (idx_t schema_idx = 0; schema_idx < meta_data->schema.size(); schema_idx++) { - auto &schema_element = meta_data->schema[schema_idx]; - if (schema_element.num_children > 0) { - continue; - } - column_types.push_back(ParquetReader::DeriveLogicalType(schema_element, false)); - schema_indexes.push_back(schema_idx); - } - - for (idx_t row_group_idx = 0; row_group_idx < meta_data->row_groups.size(); row_group_idx++) { - auto &row_group = meta_data->row_groups[row_group_idx]; - - if (row_group.columns.size() > column_types.size()) { - throw InternalException("Too many column in row group: corrupt file?"); - } - for (idx_t col_idx = 0; col_idx < row_group.columns.size(); col_idx++) { - auto &column = row_group.columns[col_idx]; - auto &col_meta = column.meta_data; - auto &stats = col_meta.statistics; - auto &schema_element = meta_data->schema[schema_indexes[col_idx]]; - auto &column_type = column_types[col_idx]; - - // file_name, LogicalType::VARCHAR - current_chunk.SetValue(0, count, file_path); - - // row_group_id, LogicalType::BIGINT - current_chunk.SetValue(1, count, Value::BIGINT(row_group_idx)); - - // row_group_num_rows, LogicalType::BIGINT - current_chunk.SetValue(2, count, Value::BIGINT(row_group.num_rows)); - - // row_group_num_columns, LogicalType::BIGINT - current_chunk.SetValue(3, count, Value::BIGINT(row_group.columns.size())); - - // row_group_bytes, LogicalType::BIGINT - current_chunk.SetValue(4, count, Value::BIGINT(row_group.total_byte_size)); - - // column_id, LogicalType::BIGINT - current_chunk.SetValue(5, count, Value::BIGINT(col_idx)); - - // file_offset, LogicalType::BIGINT - current_chunk.SetValue(6, count, ParquetElementBigint(column.file_offset, row_group.__isset.file_offset)); - - // num_values, LogicalType::BIGINT - current_chunk.SetValue(7, count, Value::BIGINT(col_meta.num_values)); - - // path_in_schema, LogicalType::VARCHAR - current_chunk.SetValue(8, count, StringUtil::Join(col_meta.path_in_schema, ", ")); - - // type, LogicalType::VARCHAR - current_chunk.SetValue(9, count, ConvertParquetElementToString(col_meta.type)); - - // stats_min, LogicalType::VARCHAR - current_chunk.SetValue(10, count, - ConvertParquetStats(column_type, schema_element, stats.__isset.min, stats.min)); - - // stats_max, LogicalType::VARCHAR - current_chunk.SetValue(11, count, - ConvertParquetStats(column_type, schema_element, stats.__isset.max, stats.max)); - - // stats_null_count, LogicalType::BIGINT - current_chunk.SetValue(12, count, ParquetElementBigint(stats.null_count, stats.__isset.null_count)); - - // stats_distinct_count, LogicalType::BIGINT - current_chunk.SetValue(13, count, ParquetElementBigint(stats.distinct_count, stats.__isset.distinct_count)); - - // stats_min_value, LogicalType::VARCHAR - current_chunk.SetValue( - 14, count, ConvertParquetStats(column_type, schema_element, stats.__isset.min_value, stats.min_value)); - - // stats_max_value, LogicalType::VARCHAR - current_chunk.SetValue( - 15, count, ConvertParquetStats(column_type, schema_element, stats.__isset.max_value, stats.max_value)); - - // compression, LogicalType::VARCHAR - current_chunk.SetValue(16, count, ConvertParquetElementToString(col_meta.codec)); - - // encodings, LogicalType::VARCHAR - vector encoding_string; - encoding_string.reserve(col_meta.encodings.size()); - for (auto &encoding : col_meta.encodings) { - encoding_string.push_back(ConvertParquetElementToString(encoding)); - } - current_chunk.SetValue(17, count, Value(StringUtil::Join(encoding_string, ", "))); - - // index_page_offset, LogicalType::BIGINT - current_chunk.SetValue( - 18, count, ParquetElementBigint(col_meta.index_page_offset, col_meta.__isset.index_page_offset)); - - // dictionary_page_offset, LogicalType::BIGINT - current_chunk.SetValue( - 19, count, - ParquetElementBigint(col_meta.dictionary_page_offset, col_meta.__isset.dictionary_page_offset)); - - // data_page_offset, LogicalType::BIGINT - current_chunk.SetValue(20, count, Value::BIGINT(col_meta.data_page_offset)); - - // total_compressed_size, LogicalType::BIGINT - current_chunk.SetValue(21, count, Value::BIGINT(col_meta.total_compressed_size)); - - // total_uncompressed_size, LogicalType::BIGINT - current_chunk.SetValue(22, count, Value::BIGINT(col_meta.total_uncompressed_size)); - - count++; - if (count >= STANDARD_VECTOR_SIZE) { - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - count = 0; - current_chunk.Reset(); - } - } - } - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - collection.InitializeScan(scan_state); -} - -void ParquetMetaDataOperatorData::BindSchema(vector &return_types, vector &names) { - names.emplace_back("file_name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("name"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("type_length"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("repetition_type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("num_children"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("converted_type"); - return_types.emplace_back(LogicalType::VARCHAR); - - names.emplace_back("scale"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("precision"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("field_id"); - return_types.emplace_back(LogicalType::BIGINT); - - names.emplace_back("logical_type"); - return_types.emplace_back(LogicalType::VARCHAR); -} - -Value ParquetLogicalTypeToString(const duckdb_parquet::format::LogicalType &type, bool is_set) { - if (!is_set) { - return Value(); - } - if (type.__isset.STRING) { - return Value(PrintParquetElementToString(type.STRING)); - } - if (type.__isset.MAP) { - return Value(PrintParquetElementToString(type.MAP)); - } - if (type.__isset.LIST) { - return Value(PrintParquetElementToString(type.LIST)); - } - if (type.__isset.ENUM) { - return Value(PrintParquetElementToString(type.ENUM)); - } - if (type.__isset.DECIMAL) { - return Value(PrintParquetElementToString(type.DECIMAL)); - } - if (type.__isset.DATE) { - return Value(PrintParquetElementToString(type.DATE)); - } - if (type.__isset.TIME) { - return Value(PrintParquetElementToString(type.TIME)); - } - if (type.__isset.TIMESTAMP) { - return Value(PrintParquetElementToString(type.TIMESTAMP)); - } - if (type.__isset.INTEGER) { - return Value(PrintParquetElementToString(type.INTEGER)); - } - if (type.__isset.UNKNOWN) { - return Value(PrintParquetElementToString(type.UNKNOWN)); - } - if (type.__isset.JSON) { - return Value(PrintParquetElementToString(type.JSON)); - } - if (type.__isset.BSON) { - return Value(PrintParquetElementToString(type.BSON)); - } - if (type.__isset.UUID) { - return Value(PrintParquetElementToString(type.UUID)); - } - return Value(); -} - -void ParquetMetaDataOperatorData::LoadSchemaData(ClientContext &context, const vector &return_types, - const string &file_path) { - collection.Reset(); - ParquetOptions parquet_options(context); - auto reader = make_uniq(context, file_path, parquet_options); - idx_t count = 0; - DataChunk current_chunk; - current_chunk.Initialize(context, return_types); - auto meta_data = reader->GetFileMetadata(); - for (idx_t col_idx = 0; col_idx < meta_data->schema.size(); col_idx++) { - auto &column = meta_data->schema[col_idx]; - - // file_name, LogicalType::VARCHAR - current_chunk.SetValue(0, count, file_path); - - // name, LogicalType::VARCHAR - current_chunk.SetValue(1, count, column.name); - - // type, LogicalType::VARCHAR - current_chunk.SetValue(2, count, ParquetElementString(column.type, column.__isset.type)); - - // type_length, LogicalType::INTEGER - current_chunk.SetValue(3, count, ParquetElementInteger(column.type_length, column.__isset.type_length)); - - // repetition_type, LogicalType::VARCHAR - current_chunk.SetValue(4, count, ParquetElementString(column.repetition_type, column.__isset.repetition_type)); - - // num_children, LogicalType::BIGINT - current_chunk.SetValue(5, count, ParquetElementBigint(column.num_children, column.__isset.num_children)); - - // converted_type, LogicalType::VARCHAR - current_chunk.SetValue(6, count, ParquetElementString(column.converted_type, column.__isset.converted_type)); - - // scale, LogicalType::BIGINT - current_chunk.SetValue(7, count, ParquetElementBigint(column.scale, column.__isset.scale)); - - // precision, LogicalType::BIGINT - current_chunk.SetValue(8, count, ParquetElementBigint(column.precision, column.__isset.precision)); - - // field_id, LogicalType::BIGINT - current_chunk.SetValue(9, count, ParquetElementBigint(column.field_id, column.__isset.field_id)); - - // logical_type, LogicalType::VARCHAR - current_chunk.SetValue(10, count, ParquetLogicalTypeToString(column.logicalType, column.__isset.logicalType)); - - count++; - if (count >= STANDARD_VECTOR_SIZE) { - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - count = 0; - current_chunk.Reset(); - } - } - current_chunk.SetCardinality(count); - collection.Append(current_chunk); - - collection.InitializeScan(scan_state); -} - -template -unique_ptr ParquetMetaDataBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - if (SCHEMA) { - ParquetMetaDataOperatorData::BindSchema(return_types, names); - } else { - ParquetMetaDataOperatorData::BindMetaData(return_types, names); - } - - auto result = make_uniq(); - result->return_types = return_types; - result->files = MultiFileReader::GetFileList(context, input.inputs[0], "Parquet"); - return std::move(result); -} - -template -unique_ptr ParquetMetaDataInit(ClientContext &context, TableFunctionInitInput &input) { - auto &bind_data = input.bind_data->Cast(); - D_ASSERT(!bind_data.files.empty()); - - auto result = make_uniq(context, bind_data.return_types); - if (SCHEMA) { - result->LoadSchemaData(context, bind_data.return_types, bind_data.files[0]); - } else { - result->LoadFileMetaData(context, bind_data.return_types, bind_data.files[0]); - } - result->file_index = 0; - return std::move(result); -} - -template -void ParquetMetaDataImplementation(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - auto &data = data_p.global_state->Cast(); - auto &bind_data = data_p.bind_data->Cast(); - - while (true) { - if (!data.collection.Scan(data.scan_state, output)) { - if (data.file_index + 1 < bind_data.files.size()) { - // load the metadata for the next file - data.file_index++; - if (SCHEMA) { - data.LoadSchemaData(context, bind_data.return_types, bind_data.files[data.file_index]); - } else { - data.LoadFileMetaData(context, bind_data.return_types, bind_data.files[data.file_index]); - } - continue; - } else { - // no files remaining: done - return; - } - } - if (output.size() != 0) { - return; - } - } -} - -ParquetMetaDataFunction::ParquetMetaDataFunction() - : TableFunction("parquet_metadata", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, - ParquetMetaDataBind, ParquetMetaDataInit) { -} - -ParquetSchemaFunction::ParquetSchemaFunction() - : TableFunction("parquet_schema", {LogicalType::VARCHAR}, ParquetMetaDataImplementation, - ParquetMetaDataBind, ParquetMetaDataInit) { -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp deleted file mode 100644 index f6f5723ae..000000000 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ /dev/null @@ -1,998 +0,0 @@ -#include "parquet_reader.hpp" -#include "parquet_timestamp.hpp" -#include "parquet_statistics.hpp" -#include "column_reader.hpp" - -#include "boolean_column_reader.hpp" -#include "row_number_column_reader.hpp" -#include "cast_column_reader.hpp" -#include "callback_column_reader.hpp" -#include "list_column_reader.hpp" -#include "string_column_reader.hpp" -#include "struct_column_reader.hpp" -#include "templated_column_reader.hpp" - -#include "thrift_tools.hpp" - -#include "parquet_file_metadata_cache.hpp" - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/planner/table_filter.hpp" -#include "duckdb/planner/filter/constant_filter.hpp" -#include "duckdb/planner/filter/null_filter.hpp" -#include "duckdb/planner/filter/conjunction_filter.hpp" -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/pair.hpp" -#include "duckdb/common/hive_partitioning.hpp" -#include "duckdb/common/vector_operations/vector_operations.hpp" - -#include "duckdb/storage/object_cache.hpp" -#endif - -#include -#include -#include -#include - -namespace duckdb { - -using duckdb_parquet::format::ColumnChunk; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::SchemaElement; -using duckdb_parquet::format::Statistics; -using duckdb_parquet::format::Type; - -static unique_ptr -CreateThriftProtocol(Allocator &allocator, FileHandle &file_handle, bool prefetch_mode) { - auto transport = make_shared(allocator, file_handle, prefetch_mode); - return make_uniq>(std::move(transport)); -} - -static shared_ptr LoadMetadata(Allocator &allocator, FileHandle &file_handle) { - auto current_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - auto proto = CreateThriftProtocol(allocator, file_handle, false); - auto &transport = reinterpret_cast(*proto->getTransport()); - auto file_size = transport.GetSize(); - if (file_size < 12) { - throw InvalidInputException("File '%s' too small to be a Parquet file", file_handle.path); - } - - ResizeableBuffer buf; - buf.resize(allocator, 8); - buf.zero(); - - transport.SetLocation(file_size - 8); - transport.read((uint8_t *)buf.ptr, 8); - - if (memcmp(buf.ptr + 4, "PAR1", 4) != 0) { - if (memcmp(buf.ptr + 4, "PARE", 4) == 0) { - throw InvalidInputException("Encrypted Parquet files are not supported for file '%s'", file_handle.path); - } - throw InvalidInputException("No magic bytes found at end of file '%s'", file_handle.path); - } - // read four-byte footer length from just before the end magic bytes - auto footer_len = *reinterpret_cast(buf.ptr); - if (footer_len == 0 || file_size < 12 + footer_len) { - throw InvalidInputException("Footer length error in file '%s'", file_handle.path); - } - auto metadata_pos = file_size - (footer_len + 8); - transport.SetLocation(metadata_pos); - transport.Prefetch(metadata_pos, footer_len); - - auto metadata = make_uniq(); - metadata->read(proto.get()); - return make_shared(std::move(metadata), current_time); -} - -LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele, bool binary_as_string) { - // inner node - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY && !s_ele.__isset.type_length) { - throw IOException("FIXED_LEN_BYTE_ARRAY requires length to be set"); - } - if (s_ele.__isset.logicalType) { - if (s_ele.logicalType.__isset.UUID) { - if (s_ele.type == Type::FIXED_LEN_BYTE_ARRAY) { - return LogicalType::UUID; - } - } else if (s_ele.logicalType.__isset.TIMESTAMP) { - if (s_ele.logicalType.TIMESTAMP.isAdjustedToUTC) { - return LogicalType::TIMESTAMP_TZ; - } - return LogicalType::TIMESTAMP; - } else if (s_ele.logicalType.__isset.TIME) { - if (s_ele.logicalType.TIME.isAdjustedToUTC) { - return LogicalType::TIME_TZ; - } - return LogicalType::TIME; - } - } - if (s_ele.__isset.converted_type) { - switch (s_ele.converted_type) { - case ConvertedType::INT_8: - if (s_ele.type == Type::INT32) { - return LogicalType::TINYINT; - } else { - throw IOException("INT8 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_16: - if (s_ele.type == Type::INT32) { - return LogicalType::SMALLINT; - } else { - throw IOException("INT16 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_32: - if (s_ele.type == Type::INT32) { - return LogicalType::INTEGER; - } else { - throw IOException("INT32 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::INT_64: - if (s_ele.type == Type::INT64) { - return LogicalType::BIGINT; - } else { - throw IOException("INT64 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_8: - if (s_ele.type == Type::INT32) { - return LogicalType::UTINYINT; - } else { - throw IOException("UINT8 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_16: - if (s_ele.type == Type::INT32) { - return LogicalType::USMALLINT; - } else { - throw IOException("UINT16 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_32: - if (s_ele.type == Type::INT32) { - return LogicalType::UINTEGER; - } else { - throw IOException("UINT32 converted type can only be set for value of Type::INT32"); - } - case ConvertedType::UINT_64: - if (s_ele.type == Type::INT64) { - return LogicalType::UBIGINT; - } else { - throw IOException("UINT64 converted type can only be set for value of Type::INT64"); - } - case ConvertedType::DATE: - if (s_ele.type == Type::INT32) { - return LogicalType::DATE; - } else { - throw IOException("DATE converted type can only be set for value of Type::INT32"); - } - case ConvertedType::TIMESTAMP_MICROS: - case ConvertedType::TIMESTAMP_MILLIS: - if (s_ele.type == Type::INT64) { - return LogicalType::TIMESTAMP; - } else { - throw IOException("TIMESTAMP converted type can only be set for value of Type::INT64"); - } - case ConvertedType::DECIMAL: - if (!s_ele.__isset.precision || !s_ele.__isset.scale) { - throw IOException("DECIMAL requires a length and scale specifier!"); - } - switch (s_ele.type) { - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - case Type::INT32: - case Type::INT64: - return LogicalType::DECIMAL(s_ele.precision, s_ele.scale); - default: - throw IOException( - "DECIMAL converted type can only be set for value of Type::(FIXED_LEN_)BYTE_ARRAY/INT32/INT64"); - } - case ConvertedType::UTF8: - case ConvertedType::ENUM: - switch (s_ele.type) { - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - return LogicalType::VARCHAR; - default: - throw IOException("UTF8 converted type can only be set for Type::(FIXED_LEN_)BYTE_ARRAY"); - } - case ConvertedType::TIME_MILLIS: - if (s_ele.type == Type::INT32) { - return LogicalType::TIME; - } else { - throw IOException("TIME_MILLIS converted type can only be set for value of Type::INT32"); - } - case ConvertedType::TIME_MICROS: - if (s_ele.type == Type::INT64) { - return LogicalType::TIME; - } else { - throw IOException("TIME_MICROS converted type can only be set for value of Type::INT64"); - } - case ConvertedType::INTERVAL: - return LogicalType::INTERVAL; - case ConvertedType::JSON: - return LogicalType::VARCHAR; - case ConvertedType::MAP: - case ConvertedType::MAP_KEY_VALUE: - case ConvertedType::LIST: - case ConvertedType::BSON: - default: - throw IOException("Unsupported converted type"); - } - } else { - // no converted type set - // use default type for each physical type - switch (s_ele.type) { - case Type::BOOLEAN: - return LogicalType::BOOLEAN; - case Type::INT32: - return LogicalType::INTEGER; - case Type::INT64: - return LogicalType::BIGINT; - case Type::INT96: // always a timestamp it would seem - return LogicalType::TIMESTAMP; - case Type::FLOAT: - return LogicalType::FLOAT; - case Type::DOUBLE: - return LogicalType::DOUBLE; - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - if (binary_as_string) { - return LogicalType::VARCHAR; - } - return LogicalType::BLOB; - default: - return LogicalType::INVALID; - } - } -} - -LogicalType ParquetReader::DeriveLogicalType(const SchemaElement &s_ele) { - return DeriveLogicalType(s_ele, parquet_options.binary_as_string); -} - -unique_ptr ParquetReader::CreateReaderRecursive(idx_t depth, idx_t max_define, idx_t max_repeat, - idx_t &next_schema_idx, idx_t &next_file_idx) { - auto file_meta_data = GetFileMetadata(); - D_ASSERT(file_meta_data); - D_ASSERT(next_schema_idx < file_meta_data->schema.size()); - auto &s_ele = file_meta_data->schema[next_schema_idx]; - auto this_idx = next_schema_idx; - - auto repetition_type = FieldRepetitionType::REQUIRED; - if (s_ele.__isset.repetition_type && this_idx > 0) { - repetition_type = s_ele.repetition_type; - } - if (repetition_type != FieldRepetitionType::REQUIRED) { - max_define++; - } - if (repetition_type == FieldRepetitionType::REPEATED) { - max_repeat++; - } - if (s_ele.__isset.num_children && s_ele.num_children > 0) { // inner node - child_list_t child_types; - vector> child_readers; - - idx_t c_idx = 0; - while (c_idx < (idx_t)s_ele.num_children) { - next_schema_idx++; - - auto &child_ele = file_meta_data->schema[next_schema_idx]; - - auto child_reader = - CreateReaderRecursive(depth + 1, max_define, max_repeat, next_schema_idx, next_file_idx); - child_types.push_back(make_pair(child_ele.name, child_reader->Type())); - child_readers.push_back(std::move(child_reader)); - - c_idx++; - } - D_ASSERT(!child_types.empty()); - unique_ptr result; - LogicalType result_type; - - bool is_repeated = repetition_type == FieldRepetitionType::REPEATED; - bool is_list = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::LIST; - bool is_map = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP; - bool is_map_kv = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP_KEY_VALUE; - if (!is_map_kv && this_idx > 0) { - // check if the parent node of this is a map - auto &p_ele = file_meta_data->schema[this_idx - 1]; - bool parent_is_map = p_ele.__isset.converted_type && p_ele.converted_type == ConvertedType::MAP; - bool parent_has_children = p_ele.__isset.num_children && p_ele.num_children == 1; - is_map_kv = parent_is_map && parent_has_children; - } - - if (is_map_kv) { - if (child_types.size() != 2) { - throw IOException("MAP_KEY_VALUE requires two children"); - } - if (!is_repeated) { - throw IOException("MAP_KEY_VALUE needs to be repeated"); - } - result_type = LogicalType::MAP(std::move(child_types[0].second), std::move(child_types[1].second)); - - auto struct_reader = - make_uniq(*this, ListType::GetChildType(result_type), s_ele, this_idx, - max_define - 1, max_repeat - 1, std::move(child_readers)); - return make_uniq(*this, result_type, s_ele, this_idx, max_define, max_repeat, - std::move(struct_reader)); - } - if (child_types.size() > 1 || (!is_list && !is_map && !is_repeated)) { - result_type = LogicalType::STRUCT(child_types); - result = make_uniq(*this, result_type, s_ele, this_idx, max_define, max_repeat, - std::move(child_readers)); - } else { - // if we have a struct with only a single type, pull up - result_type = child_types[0].second; - result = std::move(child_readers[0]); - } - if (is_repeated) { - result_type = LogicalType::LIST(result_type); - return make_uniq(*this, result_type, s_ele, this_idx, max_define, max_repeat, - std::move(result)); - } - return result; - } else { // leaf node - if (!s_ele.__isset.type) { - throw InvalidInputException( - "Node has neither num_children nor type set - this violates the Parquet spec (corrupted file)"); - } - if (s_ele.repetition_type == FieldRepetitionType::REPEATED) { - const auto derived_type = DeriveLogicalType(s_ele); - auto list_type = LogicalType::LIST(derived_type); - - auto element_reader = - ColumnReader::CreateReader(*this, derived_type, s_ele, next_file_idx++, max_define, max_repeat); - - return make_uniq(*this, list_type, s_ele, this_idx, max_define, max_repeat, - std::move(element_reader)); - } - // TODO check return value of derive type or should we only do this on read() - return ColumnReader::CreateReader(*this, DeriveLogicalType(s_ele), s_ele, next_file_idx++, max_define, - max_repeat); - } -} - -// TODO we don't need readers for columns we are not going to read ay -unique_ptr ParquetReader::CreateReader() { - auto file_meta_data = GetFileMetadata(); - idx_t next_schema_idx = 0; - idx_t next_file_idx = 0; - - if (file_meta_data->schema.empty()) { - throw IOException("Parquet reader: no schema elements found"); - } - if (file_meta_data->schema[0].num_children == 0) { - throw IOException("Parquet reader: root schema element has no children"); - } - auto ret = CreateReaderRecursive(0, 0, 0, next_schema_idx, next_file_idx); - if (ret->Type().id() != LogicalTypeId::STRUCT) { - throw InvalidInputException("Root element of Parquet file must be a struct"); - } - D_ASSERT(next_schema_idx == file_meta_data->schema.size() - 1); - D_ASSERT(file_meta_data->row_groups.empty() || next_file_idx == file_meta_data->row_groups[0].columns.size()); - - auto &root_struct_reader = ret->Cast(); - // add casts if required - for (auto &entry : reader_data.cast_map) { - auto column_idx = entry.first; - auto &expected_type = entry.second; - auto child_reader = std::move(root_struct_reader.child_readers[column_idx]); - auto cast_reader = make_uniq(std::move(child_reader), expected_type); - root_struct_reader.child_readers[column_idx] = std::move(cast_reader); - } - if (parquet_options.file_row_number) { - root_struct_reader.child_readers.push_back( - make_uniq(*this, LogicalType::BIGINT, SchemaElement(), next_file_idx, 0, 0)); - } - - return ret; -} - -void ParquetReader::InitializeSchema() { - auto file_meta_data = GetFileMetadata(); - - if (file_meta_data->__isset.encryption_algorithm) { - throw FormatException("Encrypted Parquet files are not supported"); - } - // check if we like this schema - if (file_meta_data->schema.size() < 2) { - throw FormatException("Need at least one non-root column in the file"); - } - root_reader = CreateReader(); - auto &root_type = root_reader->Type(); - auto &child_types = StructType::GetChildTypes(root_type); - D_ASSERT(root_type.id() == LogicalTypeId::STRUCT); - for (auto &type_pair : child_types) { - names.push_back(type_pair.first); - return_types.push_back(type_pair.second); - } - - // Add generated constant column for row number - if (parquet_options.file_row_number) { - if (std::find(names.begin(), names.end(), "file_row_number") != names.end()) { - throw BinderException( - "Using file_row_number option on file with column named file_row_number is not supported"); - } - return_types.emplace_back(LogicalType::BIGINT); - names.emplace_back("file_row_number"); - } -} - -ParquetOptions::ParquetOptions(ClientContext &context) { - Value binary_as_string_val; - if (context.TryGetCurrentSetting("binary_as_string", binary_as_string_val)) { - binary_as_string = binary_as_string_val.GetValue(); - } -} - -ParquetReader::ParquetReader(ClientContext &context_p, string file_name_p, ParquetOptions parquet_options_p) - : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)), - parquet_options(std::move(parquet_options_p)) { - file_name = std::move(file_name_p); - file_handle = fs.OpenFile(file_name, FileFlags::FILE_FLAGS_READ); - if (!file_handle->CanSeek()) { - throw NotImplementedException( - "Reading parquet files from a FIFO stream is not supported and cannot be efficiently supported since " - "metadata is located at the end of the file. Write the stream to disk first and read from there instead."); - } - // If object cached is disabled - // or if this file has cached metadata - // or if the cached version already expired - if (!ObjectCache::ObjectCacheEnabled(context_p)) { - metadata = LoadMetadata(allocator, *file_handle); - } else { - auto last_modify_time = fs.GetLastModifiedTime(*file_handle); - metadata = ObjectCache::GetObjectCache(context_p).Get(file_name); - if (!metadata || (last_modify_time + 10 >= metadata->read_time)) { - metadata = LoadMetadata(allocator, *file_handle); - ObjectCache::GetObjectCache(context_p).Put(file_name, metadata); - } - } - InitializeSchema(); -} - -ParquetReader::ParquetReader(ClientContext &context_p, ParquetOptions parquet_options_p, - shared_ptr metadata_p) - : fs(FileSystem::GetFileSystem(context_p)), allocator(BufferAllocator::Get(context_p)), - metadata(std::move(metadata_p)), parquet_options(std::move(parquet_options_p)) { - InitializeSchema(); -} - -ParquetReader::~ParquetReader() { -} - -const FileMetaData *ParquetReader::GetFileMetadata() { - D_ASSERT(metadata); - D_ASSERT(metadata->metadata); - return metadata->metadata.get(); -} - -unique_ptr ParquetReader::ReadStatistics(const string &name) { - idx_t file_col_idx; - for (file_col_idx = 0; file_col_idx < names.size(); file_col_idx++) { - if (names[file_col_idx] == name) { - break; - } - } - if (file_col_idx == names.size()) { - return nullptr; - } - - unique_ptr column_stats; - auto file_meta_data = GetFileMetadata(); - auto column_reader = root_reader->Cast().GetChildReader(file_col_idx); - - for (idx_t row_group_idx = 0; row_group_idx < file_meta_data->row_groups.size(); row_group_idx++) { - auto &row_group = file_meta_data->row_groups[row_group_idx]; - auto chunk_stats = column_reader->Stats(row_group_idx, row_group.columns); - if (!chunk_stats) { - return nullptr; - } - if (!column_stats) { - column_stats = std::move(chunk_stats); - } else { - column_stats->Merge(*chunk_stats); - } - } - return column_stats; -} - -const ParquetRowGroup &ParquetReader::GetGroup(ParquetReaderScanState &state) { - auto file_meta_data = GetFileMetadata(); - D_ASSERT(state.current_group >= 0 && (idx_t)state.current_group < state.group_idx_list.size()); - D_ASSERT(state.group_idx_list[state.current_group] >= 0 && - state.group_idx_list[state.current_group] < file_meta_data->row_groups.size()); - return file_meta_data->row_groups[state.group_idx_list[state.current_group]]; -} - -uint64_t ParquetReader::GetGroupCompressedSize(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - auto total_compressed_size = group.total_compressed_size; - - idx_t calc_compressed_size = 0; - - // If the global total_compressed_size is not set, we can still calculate it - if (group.total_compressed_size == 0) { - for (auto &column_chunk : group.columns) { - calc_compressed_size += column_chunk.meta_data.total_compressed_size; - } - } - - if (total_compressed_size != 0 && calc_compressed_size != 0 && - (idx_t)total_compressed_size != calc_compressed_size) { - throw InvalidInputException("mismatch between calculated compressed size and reported compressed size"); - } - - return total_compressed_size ? total_compressed_size : calc_compressed_size; -} - -uint64_t ParquetReader::GetGroupSpan(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - idx_t min_offset = NumericLimits::Maximum(); - idx_t max_offset = NumericLimits::Minimum(); - - for (auto &column_chunk : group.columns) { - - // Set the min offset - idx_t current_min_offset = NumericLimits::Maximum(); - if (column_chunk.meta_data.__isset.dictionary_page_offset) { - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.dictionary_page_offset); - } - if (column_chunk.meta_data.__isset.index_page_offset) { - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.index_page_offset); - } - current_min_offset = MinValue(current_min_offset, column_chunk.meta_data.data_page_offset); - min_offset = MinValue(current_min_offset, min_offset); - max_offset = MaxValue(max_offset, column_chunk.meta_data.total_compressed_size + current_min_offset); - } - - return max_offset - min_offset; -} - -idx_t ParquetReader::GetGroupOffset(ParquetReaderScanState &state) { - auto &group = GetGroup(state); - idx_t min_offset = NumericLimits::Maximum(); - - for (auto &column_chunk : group.columns) { - if (column_chunk.meta_data.__isset.dictionary_page_offset) { - min_offset = MinValue(min_offset, column_chunk.meta_data.dictionary_page_offset); - } - if (column_chunk.meta_data.__isset.index_page_offset) { - min_offset = MinValue(min_offset, column_chunk.meta_data.index_page_offset); - } - min_offset = MinValue(min_offset, column_chunk.meta_data.data_page_offset); - } - - return min_offset; -} - -void ParquetReader::PrepareRowGroupBuffer(ParquetReaderScanState &state, idx_t col_idx) { - auto &group = GetGroup(state); - auto column_id = reader_data.column_ids[col_idx]; - auto column_reader = state.root_reader->Cast().GetChildReader(column_id); - - // TODO move this to columnreader too - if (reader_data.filters) { - auto stats = column_reader->Stats(state.group_idx_list[state.current_group], group.columns); - // filters contain output chunk index, not file col idx! - auto global_id = reader_data.column_mapping[col_idx]; - auto filter_entry = reader_data.filters->filters.find(global_id); - if (stats && filter_entry != reader_data.filters->filters.end()) { - bool skip_chunk = false; - auto &filter = *filter_entry->second; - auto prune_result = filter.CheckStatistics(*stats); - if (prune_result == FilterPropagateResult::FILTER_ALWAYS_FALSE) { - skip_chunk = true; - } - if (skip_chunk) { - // this effectively will skip this chunk - state.group_offset = group.num_rows; - return; - } - } - } - - state.root_reader->InitializeRead(state.group_idx_list[state.current_group], group.columns, - *state.thrift_file_proto); -} - -idx_t ParquetReader::NumRows() { - return GetFileMetadata()->num_rows; -} - -idx_t ParquetReader::NumRowGroups() { - return GetFileMetadata()->row_groups.size(); -} - -void ParquetReader::InitializeScan(ParquetReaderScanState &state, vector groups_to_read) { - state.current_group = -1; - state.finished = false; - state.group_offset = 0; - state.group_idx_list = std::move(groups_to_read); - state.sel.Initialize(STANDARD_VECTOR_SIZE); - if (!state.file_handle || state.file_handle->path != file_handle->path) { - auto flags = FileFlags::FILE_FLAGS_READ; - - if (!file_handle->OnDiskFile() && file_handle->CanSeek()) { - state.prefetch_mode = true; - flags |= FileFlags::FILE_FLAGS_DIRECT_IO; - } else { - state.prefetch_mode = false; - } - - state.file_handle = fs.OpenFile(file_handle->path, flags); - } - - state.thrift_file_proto = CreateThriftProtocol(allocator, *state.file_handle, state.prefetch_mode); - state.root_reader = CreateReader(); - state.define_buf.resize(allocator, STANDARD_VECTOR_SIZE); - state.repeat_buf.resize(allocator, STANDARD_VECTOR_SIZE); -} - -void FilterIsNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { - if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto &mask = ConstantVector::Validity(v); - if (mask.RowIsValid(0)) { - filter_mask.reset(); - } - return; - } - D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); - - auto &mask = FlatVector::Validity(v); - if (mask.AllValid()) { - filter_mask.reset(); - } else { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && !mask.RowIsValid(i); - } - } -} - -void FilterIsNotNull(Vector &v, parquet_filter_t &filter_mask, idx_t count) { - if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto &mask = ConstantVector::Validity(v); - if (!mask.RowIsValid(0)) { - filter_mask.reset(); - } - return; - } - D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); - - auto &mask = FlatVector::Validity(v); - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && mask.RowIsValid(i); - } - } -} - -template -void TemplatedFilterOperation(Vector &v, T constant, parquet_filter_t &filter_mask, idx_t count) { - if (v.GetVectorType() == VectorType::CONSTANT_VECTOR) { - auto v_ptr = ConstantVector::GetData(v); - auto &mask = ConstantVector::Validity(v); - - if (mask.RowIsValid(0)) { - if (!OP::Operation(v_ptr[0], constant)) { - filter_mask.reset(); - } - } - return; - } - - D_ASSERT(v.GetVectorType() == VectorType::FLAT_VECTOR); - auto v_ptr = FlatVector::GetData(v); - auto &mask = FlatVector::Validity(v); - - if (!mask.AllValid()) { - for (idx_t i = 0; i < count; i++) { - if (mask.RowIsValid(i)) { - filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); - } - } - } else { - for (idx_t i = 0; i < count; i++) { - filter_mask[i] = filter_mask[i] && OP::Operation(v_ptr[i], constant); - } - } -} - -template -void TemplatedFilterOperation(Vector &v, const Value &constant, parquet_filter_t &filter_mask, idx_t count) { - TemplatedFilterOperation(v, constant.template GetValueUnsafe(), filter_mask, count); -} - -template -static void FilterOperationSwitch(Vector &v, Value &constant, parquet_filter_t &filter_mask, idx_t count) { - if (filter_mask.none() || count == 0) { - return; - } - switch (v.GetType().InternalType()) { - case PhysicalType::BOOL: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT8: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT16: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT32: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::UINT64: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT8: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT16: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT32: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT64: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::INT128: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::FLOAT: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::DOUBLE: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - case PhysicalType::VARCHAR: - TemplatedFilterOperation(v, constant, filter_mask, count); - break; - default: - throw NotImplementedException("Unsupported type for filter %s", v.ToString()); - } -} - -static void ApplyFilter(Vector &v, TableFilter &filter, parquet_filter_t &filter_mask, idx_t count) { - switch (filter.filter_type) { - case TableFilterType::CONJUNCTION_AND: { - auto &conjunction = filter.Cast(); - for (auto &child_filter : conjunction.child_filters) { - ApplyFilter(v, *child_filter, filter_mask, count); - } - break; - } - case TableFilterType::CONJUNCTION_OR: { - auto &conjunction = filter.Cast(); - parquet_filter_t or_mask; - for (auto &child_filter : conjunction.child_filters) { - parquet_filter_t child_mask = filter_mask; - ApplyFilter(v, *child_filter, child_mask, count); - or_mask |= child_mask; - } - filter_mask &= or_mask; - break; - } - case TableFilterType::CONSTANT_COMPARISON: { - auto &constant_filter = filter.Cast(); - switch (constant_filter.comparison_type) { - case ExpressionType::COMPARE_EQUAL: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_LESSTHAN: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_LESSTHANOREQUALTO: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_GREATERTHAN: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - case ExpressionType::COMPARE_GREATERTHANOREQUALTO: - FilterOperationSwitch(v, constant_filter.constant, filter_mask, count); - break; - default: - D_ASSERT(0); - } - break; - } - case TableFilterType::IS_NOT_NULL: - FilterIsNotNull(v, filter_mask, count); - break; - case TableFilterType::IS_NULL: - FilterIsNull(v, filter_mask, count); - break; - default: - D_ASSERT(0); - break; - } -} - -void ParquetReader::Scan(ParquetReaderScanState &state, DataChunk &result) { - while (ScanInternal(state, result)) { - if (result.size() > 0) { - break; - } - result.Reset(); - } -} - -bool ParquetReader::ScanInternal(ParquetReaderScanState &state, DataChunk &result) { - if (state.finished) { - return false; - } - - // see if we have to switch to the next row group in the parquet file - if (state.current_group < 0 || (int64_t)state.group_offset >= GetGroup(state).num_rows) { - state.current_group++; - state.group_offset = 0; - - auto &trans = reinterpret_cast(*state.thrift_file_proto->getTransport()); - trans.ClearPrefetch(); - state.current_group_prefetched = false; - - if ((idx_t)state.current_group == state.group_idx_list.size()) { - state.finished = true; - return false; - } - - uint64_t to_scan_compressed_bytes = 0; - for (idx_t col_idx = 0; col_idx < reader_data.column_ids.size(); col_idx++) { - PrepareRowGroupBuffer(state, col_idx); - - auto file_col_idx = reader_data.column_ids[col_idx]; - - auto &root_reader = state.root_reader->Cast(); - to_scan_compressed_bytes += root_reader.GetChildReader(file_col_idx)->TotalCompressedSize(); - } - - auto &group = GetGroup(state); - if (state.prefetch_mode && state.group_offset != (idx_t)group.num_rows) { - - uint64_t total_row_group_span = GetGroupSpan(state); - - double scan_percentage = (double)(to_scan_compressed_bytes) / total_row_group_span; - - if (to_scan_compressed_bytes > total_row_group_span) { - throw InvalidInputException( - "Malformed parquet file: sum of total compressed bytes of columns seems incorrect"); - } - - if (!reader_data.filters && - scan_percentage > ParquetReaderPrefetchConfig::WHOLE_GROUP_PREFETCH_MINIMUM_SCAN) { - // Prefetch the whole row group - if (!state.current_group_prefetched) { - auto total_compressed_size = GetGroupCompressedSize(state); - if (total_compressed_size > 0) { - trans.Prefetch(GetGroupOffset(state), total_row_group_span); - } - state.current_group_prefetched = true; - } - } else { - // lazy fetching is when all tuples in a column can be skipped. With lazy fetching the buffer is only - // fetched on the first read to that buffer. - bool lazy_fetch = reader_data.filters; - - // Prefetch column-wise - for (idx_t col_idx = 0; col_idx < reader_data.column_ids.size(); col_idx++) { - auto file_col_idx = reader_data.column_ids[col_idx]; - auto &root_reader = state.root_reader->Cast(); - - bool has_filter = false; - if (reader_data.filters) { - auto entry = reader_data.filters->filters.find(reader_data.column_mapping[col_idx]); - has_filter = entry != reader_data.filters->filters.end(); - } - root_reader.GetChildReader(file_col_idx)->RegisterPrefetch(trans, !(lazy_fetch && !has_filter)); - } - - trans.FinalizeRegistration(); - - if (!lazy_fetch) { - trans.PrefetchRegistered(); - } - } - } - return true; - } - - auto this_output_chunk_rows = MinValue(STANDARD_VECTOR_SIZE, GetGroup(state).num_rows - state.group_offset); - result.SetCardinality(this_output_chunk_rows); - - if (this_output_chunk_rows == 0) { - state.finished = true; - return false; // end of last group, we are done - } - - // we evaluate simple table filters directly in this scan so we can skip decoding column data that's never going to - // be relevant - parquet_filter_t filter_mask; - filter_mask.set(); - - // mask out unused part of bitset - for (idx_t i = this_output_chunk_rows; i < STANDARD_VECTOR_SIZE; i++) { - filter_mask.set(i, false); - } - - state.define_buf.zero(); - state.repeat_buf.zero(); - - auto define_ptr = (uint8_t *)state.define_buf.ptr; - auto repeat_ptr = (uint8_t *)state.repeat_buf.ptr; - - auto &root_reader = state.root_reader->Cast(); - - if (reader_data.filters) { - vector need_to_read(reader_data.column_ids.size(), true); - - // first load the columns that are used in filters - for (auto &filter_col : reader_data.filters->filters) { - if (filter_mask.none()) { - // if no rows are left we can stop checking filters - break; - } - auto filter_entry = reader_data.filter_map[filter_col.first]; - if (filter_entry.is_constant) { - // this is a constant vector, look for the constant - auto &constant = reader_data.constant_map[filter_entry.index].value; - Vector constant_vector(constant); - ApplyFilter(constant_vector, *filter_col.second, filter_mask, this_output_chunk_rows); - } else { - auto id = filter_entry.index; - auto file_col_idx = reader_data.column_ids[id]; - auto result_idx = reader_data.column_mapping[id]; - - auto &result_vector = result.data[result_idx]; - auto child_reader = root_reader.GetChildReader(file_col_idx); - child_reader->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result_vector); - need_to_read[id] = false; - - ApplyFilter(result_vector, *filter_col.second, filter_mask, this_output_chunk_rows); - } - } - - // we still may have to read some cols - for (idx_t col_idx = 0; col_idx < reader_data.column_ids.size(); col_idx++) { - if (!need_to_read[col_idx]) { - continue; - } - auto file_col_idx = reader_data.column_ids[col_idx]; - if (filter_mask.none()) { - root_reader.GetChildReader(file_col_idx)->Skip(result.size()); - continue; - } - auto &result_vector = result.data[reader_data.column_mapping[col_idx]]; - auto child_reader = root_reader.GetChildReader(file_col_idx); - child_reader->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result_vector); - } - - idx_t sel_size = 0; - for (idx_t i = 0; i < this_output_chunk_rows; i++) { - if (filter_mask[i]) { - state.sel.set_index(sel_size++, i); - } - } - - result.Slice(state.sel, sel_size); - } else { - for (idx_t col_idx = 0; col_idx < reader_data.column_ids.size(); col_idx++) { - auto file_col_idx = reader_data.column_ids[col_idx]; - auto &result_vector = result.data[reader_data.column_mapping[col_idx]]; - auto child_reader = root_reader.GetChildReader(file_col_idx); - auto rows_read = child_reader->Read(result.size(), filter_mask, define_ptr, repeat_ptr, result_vector); - if (rows_read != result.size()) { - throw InvalidInputException("Mismatch in parquet read for column %llu, expected %llu rows, got %llu", - file_col_idx, result.size(), rows_read); - } - } - } - - state.group_offset += this_output_chunk_rows; - return true; -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_statistics.cpp b/src/duckdb/extension/parquet/parquet_statistics.cpp deleted file mode 100644 index 3eef0d165..000000000 --- a/src/duckdb/extension/parquet/parquet_statistics.cpp +++ /dev/null @@ -1,310 +0,0 @@ -#include "parquet_statistics.hpp" - -#include "duckdb.hpp" -#include "parquet_decimal_utils.hpp" -#include "parquet_timestamp.hpp" -#include "string_column_reader.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/blob.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/value.hpp" -#endif - -namespace duckdb { - -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Type; - -static unique_ptr CreateNumericStats(const LogicalType &type, - const duckdb_parquet::format::SchemaElement &schema_ele, - const duckdb_parquet::format::Statistics &parquet_stats) { - auto stats = NumericStats::CreateUnknown(type); - - // for reasons unknown to science, Parquet defines *both* `min` and `min_value` as well as `max` and - // `max_value`. All are optional. such elegance. - Value min; - Value max; - if (parquet_stats.__isset.min) { - min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min).DefaultCastAs(type); - } else if (parquet_stats.__isset.min_value) { - min = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.min_value).DefaultCastAs(type); - } else { - min = Value(type); - } - if (parquet_stats.__isset.max) { - max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max).DefaultCastAs(type); - } else if (parquet_stats.__isset.max_value) { - max = ParquetStatisticsUtils::ConvertValue(type, schema_ele, parquet_stats.max_value).DefaultCastAs(type); - } else { - max = Value(type); - } - NumericStats::SetMin(stats, min); - NumericStats::SetMax(stats, max); - return stats.ToUnique(); -} - -Value ParquetStatisticsUtils::ConvertValue(const LogicalType &type, - const duckdb_parquet::format::SchemaElement &schema_ele, - const std::string &stats) { - if (stats.empty()) { - return Value(); - } - auto stats_data = const_data_ptr_cast(stats.c_str()); - switch (type.id()) { - case LogicalTypeId::BOOLEAN: { - if (stats.size() != sizeof(bool)) { - throw InternalException("Incorrect stats size for type BOOLEAN"); - } - return Value::BOOLEAN(Load(stats_data)); - } - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - if (stats.size() != sizeof(uint32_t)) { - throw InternalException("Incorrect stats size for type UINTEGER"); - } - return Value::UINTEGER(Load(stats_data)); - case LogicalTypeId::UBIGINT: - if (stats.size() != sizeof(uint64_t)) { - throw InternalException("Incorrect stats size for type UBIGINT"); - } - return Value::UBIGINT(Load(stats_data)); - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type INTEGER"); - } - return Value::INTEGER(Load(stats_data)); - case LogicalTypeId::BIGINT: - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type BIGINT"); - } - return Value::BIGINT(Load(stats_data)); - case LogicalTypeId::FLOAT: { - if (stats.size() != sizeof(float)) { - throw InternalException("Incorrect stats size for type FLOAT"); - } - auto val = Load(stats_data); - if (!Value::FloatIsFinite(val)) { - return Value(); - } - return Value::FLOAT(val); - } - case LogicalTypeId::DOUBLE: { - if (stats.size() != sizeof(double)) { - throw InternalException("Incorrect stats size for type DOUBLE"); - } - auto val = Load(stats_data); - if (!Value::DoubleIsFinite(val)) { - return Value(); - } - return Value::DOUBLE(val); - } - case LogicalTypeId::DECIMAL: { - auto width = DecimalType::GetWidth(type); - auto scale = DecimalType::GetScale(type); - switch (schema_ele.type) { - case Type::INT32: { - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type %s", type.ToString()); - } - return Value::DECIMAL(Load(stats_data), width, scale); - } - case Type::INT64: { - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type %s", type.ToString()); - } - return Value::DECIMAL(Load(stats_data), width, scale); - } - case Type::BYTE_ARRAY: - case Type::FIXED_LEN_BYTE_ARRAY: - if (stats.size() > GetTypeIdSize(type.InternalType())) { - throw InternalException("Incorrect stats size for type %s", type.ToString()); - } - switch (type.InternalType()) { - case PhysicalType::INT16: - return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue(stats_data, stats.size()), width, - scale); - case PhysicalType::INT32: - return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue(stats_data, stats.size()), width, - scale); - case PhysicalType::INT64: - return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue(stats_data, stats.size()), width, - scale); - case PhysicalType::INT128: - return Value::DECIMAL(ParquetDecimalUtils::ReadDecimalValue(stats_data, stats.size()), width, - scale); - default: - throw InternalException("Unsupported internal type for decimal"); - } - default: - throw InternalException("Unsupported internal type for decimal?.."); - } - } - case LogicalType::VARCHAR: - case LogicalType::BLOB: - if (Value::StringIsValid(stats)) { - return Value(stats); - } else { - return Value(Blob::ToString(string_t(stats))); - } - case LogicalTypeId::DATE: - if (stats.size() != sizeof(int32_t)) { - throw InternalException("Incorrect stats size for type DATE"); - } - return Value::DATE(date_t(Load(stats_data))); - case LogicalTypeId::TIME: { - int64_t val; - if (stats.size() == sizeof(int32_t)) { - val = Load(stats_data); - } else if (stats.size() == sizeof(int64_t)) { - val = Load(stats_data); - } else { - throw InternalException("Incorrect stats size for type TIME"); - } - if (schema_ele.__isset.logicalType && schema_ele.logicalType.__isset.TIME) { - // logical type - if (schema_ele.logicalType.TIME.unit.__isset.MILLIS) { - return Value::TIME(Time::FromTimeMs(val)); - } else if (schema_ele.logicalType.TIME.unit.__isset.NANOS) { - return Value::TIME(Time::FromTimeNs(val)); - } else if (schema_ele.logicalType.TIME.unit.__isset.MICROS) { - return Value::TIME(dtime_t(val)); - } else { - throw InternalException("Time logicalType is set but unit is not defined"); - } - } - if (schema_ele.converted_type == duckdb_parquet::format::ConvertedType::TIME_MILLIS) { - return Value::TIME(Time::FromTimeMs(val)); - } else { - return Value::TIME(dtime_t(val)); - } - } - case LogicalTypeId::TIME_TZ: { - int64_t val; - if (stats.size() == sizeof(int64_t)) { - val = Load(stats_data); - } else { - throw InternalException("Incorrect stats size for type TIMETZ"); - } - if (schema_ele.__isset.logicalType && schema_ele.logicalType.__isset.TIME) { - // logical type - if (schema_ele.logicalType.TIME.unit.__isset.MICROS) { - return Value::TIMETZ(ParquetIntToTimeTZ(val)); - } else { - throw InternalException("Time With Time Zone logicalType is set but unit is not defined"); - } - } - return Value::TIMETZ(ParquetIntToTimeTZ(val)); - } - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: { - if (schema_ele.type == Type::INT96) { - if (stats.size() != sizeof(Int96)) { - throw InternalException("Incorrect stats size for type TIMESTAMP"); - } - return Value::TIMESTAMP(ImpalaTimestampToTimestamp(Load(stats_data))); - } else { - D_ASSERT(schema_ele.type == Type::INT64); - if (stats.size() != sizeof(int64_t)) { - throw InternalException("Incorrect stats size for type TIMESTAMP"); - } - auto val = Load(stats_data); - if (schema_ele.__isset.logicalType && schema_ele.logicalType.__isset.TIMESTAMP) { - // logical type - if (schema_ele.logicalType.TIMESTAMP.unit.__isset.MILLIS) { - return Value::TIMESTAMPMS(timestamp_t(val)); - } else if (schema_ele.logicalType.TIMESTAMP.unit.__isset.NANOS) { - return Value::TIMESTAMPNS(timestamp_t(val)); - } else if (schema_ele.logicalType.TIMESTAMP.unit.__isset.MICROS) { - return Value::TIMESTAMP(timestamp_t(val)); - } else { - throw InternalException("Timestamp logicalType is set but unit is not defined"); - } - } - if (schema_ele.converted_type == duckdb_parquet::format::ConvertedType::TIMESTAMP_MILLIS) { - return Value::TIMESTAMPMS(timestamp_t(val)); - } else { - return Value::TIMESTAMP(timestamp_t(val)); - } - } - } - default: - throw InternalException("Unsupported type for stats %s", type.ToString()); - } -} - -unique_ptr ParquetStatisticsUtils::TransformColumnStatistics(const SchemaElement &s_ele, - const LogicalType &type, - const ColumnChunk &column_chunk) { - if (!column_chunk.__isset.meta_data || !column_chunk.meta_data.__isset.statistics) { - // no stats present for row group - return nullptr; - } - auto &parquet_stats = column_chunk.meta_data.statistics; - unique_ptr row_group_stats; - - switch (type.id()) { - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - case LogicalTypeId::UBIGINT: - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::BIGINT: - case LogicalTypeId::FLOAT: - case LogicalTypeId::DOUBLE: - case LogicalTypeId::DATE: - case LogicalTypeId::TIME: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_SEC: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::DECIMAL: - row_group_stats = CreateNumericStats(type, s_ele, parquet_stats); - break; - case LogicalTypeId::VARCHAR: { - auto string_stats = StringStats::CreateEmpty(type); - if (parquet_stats.__isset.min) { - StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true); - StringStats::Update(string_stats, parquet_stats.min); - } else if (parquet_stats.__isset.min_value) { - StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true); - StringStats::Update(string_stats, parquet_stats.min_value); - } else { - return nullptr; - } - if (parquet_stats.__isset.max) { - StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true); - StringStats::Update(string_stats, parquet_stats.max); - } else if (parquet_stats.__isset.max_value) { - StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true); - StringStats::Update(string_stats, parquet_stats.max_value); - } else { - return nullptr; - } - StringStats::SetContainsUnicode(string_stats); - StringStats::ResetMaxStringLength(string_stats); - row_group_stats = string_stats.ToUnique(); - break; - } - default: - // no stats for you - break; - } // end of type switch - - // null count is generic - if (!row_group_stats) { - // if stats are missing from any row group we know squat - return nullptr; - } - row_group_stats->Set(StatsInfo::CAN_HAVE_NULL_AND_VALID_VALUES); - if (parquet_stats.__isset.null_count && parquet_stats.null_count == 0) { - row_group_stats->Set(StatsInfo::CANNOT_HAVE_NULL_VALUES); - } - return row_group_stats; -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_timestamp.cpp b/src/duckdb/extension/parquet/parquet_timestamp.cpp deleted file mode 100644 index 08cf021a6..000000000 --- a/src/duckdb/extension/parquet/parquet_timestamp.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#include "parquet_timestamp.hpp" - -#include "duckdb.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/types/date.hpp" -#include "duckdb/common/types/time.hpp" -#include "duckdb/common/types/timestamp.hpp" -#endif - -namespace duckdb { - -// surely they are joking -static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL; -static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL; -static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL; -static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL; - -static int64_t ImpalaTimestampToMicroseconds(const Int96 &impala_timestamp) { - int64_t days_since_epoch = impala_timestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS; - auto nanoseconds = Load(const_data_ptr_cast(impala_timestamp.value)); - auto microseconds = nanoseconds / NANOSECONDS_PER_MICRO; - return days_since_epoch * MICROSECONDS_PER_DAY + microseconds; -} - -timestamp_t ImpalaTimestampToTimestamp(const Int96 &raw_ts) { - auto impala_us = ImpalaTimestampToMicroseconds(raw_ts); - return Timestamp::FromEpochMicroSeconds(impala_us); -} - -Int96 TimestampToImpalaTimestamp(timestamp_t &ts) { - int32_t hour, min, sec, msec; - Time::Convert(Timestamp::GetTime(ts), hour, min, sec, msec); - uint64_t ms_since_midnight = hour * 60 * 60 * 1000 + min * 60 * 1000 + sec * 1000 + msec; - auto days_since_epoch = Date::Epoch(Timestamp::GetDate(ts)) / int64_t(24 * 60 * 60); - // first two uint32 in Int96 are nanoseconds since midnights - // last uint32 is number of days since year 4713 BC ("Julian date") - Int96 impala_ts; - Store(ms_since_midnight * 1000000, data_ptr_cast(impala_ts.value)); - impala_ts.value[2] = days_since_epoch + JULIAN_TO_UNIX_EPOCH_DAYS; - return impala_ts; -} - -timestamp_t ParquetTimestampMicrosToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMicroSeconds(raw_ts); -} -timestamp_t ParquetTimestampMsToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochMs(raw_ts); -} -timestamp_t ParquetTimestampNsToTimestamp(const int64_t &raw_ts) { - return Timestamp::FromEpochNanoSeconds(raw_ts); -} - -date_t ParquetIntToDate(const int32_t &raw_date) { - return date_t(raw_date); -} - -dtime_t ParquetIntToTimeMs(const int32_t &raw_time) { - return Time::FromTimeMs(raw_time); -} - -dtime_t ParquetIntToTime(const int64_t &raw_time) { - return dtime_t(raw_time); -} - -dtime_t ParquetIntToTimeNs(const int64_t &raw_time) { - return Time::FromTimeNs(raw_time); -} - -dtime_tz_t ParquetIntToTimeTZ(const int64_t &raw_time) { - dtime_tz_t result; - result.bits = raw_time; - return result; -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/parquet_writer.cpp b/src/duckdb/extension/parquet/parquet_writer.cpp deleted file mode 100644 index 8d9d3fc11..000000000 --- a/src/duckdb/extension/parquet/parquet_writer.cpp +++ /dev/null @@ -1,455 +0,0 @@ -#include "parquet_writer.hpp" - -#include "duckdb.hpp" -#include "parquet_timestamp.hpp" -#ifndef DUCKDB_AMALGAMATION -#include "duckdb/common/file_system.hpp" -#include "duckdb/common/serializer/buffered_file_writer.hpp" -#include "duckdb/common/string_util.hpp" -#include "duckdb/function/table_function.hpp" -#include "duckdb/main/client_context.hpp" -#include "duckdb/main/connection.hpp" -#include "duckdb/parser/parsed_data/create_copy_function_info.hpp" -#include "duckdb/parser/parsed_data/create_table_function_info.hpp" -#endif - -namespace duckdb { - -using namespace duckdb_apache::thrift; // NOLINT -using namespace duckdb_apache::thrift::protocol; // NOLINT -using namespace duckdb_apache::thrift::transport; // NOLINT - -using duckdb_parquet::format::CompressionCodec; -using duckdb_parquet::format::ConvertedType; -using duckdb_parquet::format::Encoding; -using duckdb_parquet::format::FieldRepetitionType; -using duckdb_parquet::format::FileMetaData; -using duckdb_parquet::format::PageHeader; -using duckdb_parquet::format::PageType; -using ParquetRowGroup = duckdb_parquet::format::RowGroup; -using duckdb_parquet::format::Type; - -ChildFieldIDs::ChildFieldIDs() { - ids = make_uniq>(); -} - -ChildFieldIDs ChildFieldIDs::Copy() const { - ChildFieldIDs result; - for (const auto &id : *ids) { - result.ids->emplace(id.first, id.second.Copy()); - } - return result; -} - -FieldID::FieldID() : set(false) { -} - -FieldID::FieldID(int32_t field_id_p) : set(true), field_id(field_id_p) { -} - -FieldID FieldID::Copy() const { - auto result = set ? FieldID(field_id) : FieldID(); - result.child_field_ids = child_field_ids.Copy(); - return result; -} - -class MyTransport : public TTransport { -public: - explicit MyTransport(Serializer &serializer) : serializer(serializer) { - } - - bool isOpen() const override { - return true; - } - - void open() override { - } - - void close() override { - } - - void write_virt(const uint8_t *buf, uint32_t len) override { - serializer.WriteData(const_data_ptr_cast(buf), len); - } - -private: - Serializer &serializer; -}; - -bool ParquetWriter::DuckDBTypeToParquetTypeInternal(const LogicalType &duckdb_type, Type::type &parquet_type) { - switch (duckdb_type.id()) { - case LogicalTypeId::BOOLEAN: - parquet_type = Type::BOOLEAN; - break; - case LogicalTypeId::TINYINT: - case LogicalTypeId::SMALLINT: - case LogicalTypeId::INTEGER: - case LogicalTypeId::DATE: - parquet_type = Type::INT32; - break; - case LogicalTypeId::BIGINT: - parquet_type = Type::INT64; - break; - case LogicalTypeId::FLOAT: - parquet_type = Type::FLOAT; - break; - case LogicalTypeId::DOUBLE: - case LogicalTypeId::HUGEINT: - parquet_type = Type::DOUBLE; - break; - case LogicalTypeId::ENUM: - case LogicalTypeId::BLOB: - case LogicalTypeId::VARCHAR: - parquet_type = Type::BYTE_ARRAY; - break; - case LogicalTypeId::TIME: - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP_MS: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_SEC: - parquet_type = Type::INT64; - break; - case LogicalTypeId::UTINYINT: - case LogicalTypeId::USMALLINT: - case LogicalTypeId::UINTEGER: - parquet_type = Type::INT32; - break; - case LogicalTypeId::UBIGINT: - parquet_type = Type::INT64; - break; - case LogicalTypeId::INTERVAL: - case LogicalTypeId::UUID: - parquet_type = Type::FIXED_LEN_BYTE_ARRAY; - break; - case LogicalTypeId::DECIMAL: - switch (duckdb_type.InternalType()) { - case PhysicalType::INT16: - case PhysicalType::INT32: - parquet_type = Type::INT32; - break; - case PhysicalType::INT64: - parquet_type = Type::INT64; - break; - case PhysicalType::INT128: - parquet_type = Type::FIXED_LEN_BYTE_ARRAY; - break; - default: - throw InternalException("Unsupported internal decimal type"); - } - break; - default: - // Anything that is not supported returns false - return false; - } - return true; -} - -Type::type ParquetWriter::DuckDBTypeToParquetType(const LogicalType &duckdb_type) { - Type::type result; - if (!DuckDBTypeToParquetTypeInternal(duckdb_type, result)) { - throw NotImplementedException("Unimplemented type for Parquet \"%s\"", duckdb_type.ToString()); - } - return result; -} - -bool ParquetWriter::TypeIsSupported(const LogicalType &type) { - Type::type unused; - auto id = type.id(); - if (id == LogicalTypeId::LIST) { - auto &child_type = ListType::GetChildType(type); - return TypeIsSupported(child_type); - } - if (id == LogicalTypeId::STRUCT) { - auto &children = StructType::GetChildTypes(type); - for (auto &child : children) { - auto &child_type = child.second; - if (!TypeIsSupported(child_type)) { - return false; - } - } - return true; - } - if (id == LogicalTypeId::MAP) { - auto &key_type = MapType::KeyType(type); - auto &value_type = MapType::ValueType(type); - if (!TypeIsSupported(key_type)) { - return false; - } - if (!TypeIsSupported(value_type)) { - return false; - } - return true; - } - return DuckDBTypeToParquetTypeInternal(type, unused); -} - -void ParquetWriter::SetSchemaProperties(const LogicalType &duckdb_type, - duckdb_parquet::format::SchemaElement &schema_ele) { - switch (duckdb_type.id()) { - case LogicalTypeId::TINYINT: - schema_ele.converted_type = ConvertedType::INT_8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::SMALLINT: - schema_ele.converted_type = ConvertedType::INT_16; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::INTEGER: - schema_ele.converted_type = ConvertedType::INT_32; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::BIGINT: - schema_ele.converted_type = ConvertedType::INT_64; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UTINYINT: - schema_ele.converted_type = ConvertedType::UINT_8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::USMALLINT: - schema_ele.converted_type = ConvertedType::UINT_16; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UINTEGER: - schema_ele.converted_type = ConvertedType::UINT_32; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UBIGINT: - schema_ele.converted_type = ConvertedType::UINT_64; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::DATE: - schema_ele.converted_type = ConvertedType::DATE; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::TIME_TZ: - case LogicalTypeId::TIME: - schema_ele.converted_type = ConvertedType::TIME_MICROS; - schema_ele.__isset.converted_type = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.TIME = true; - schema_ele.logicalType.TIME.isAdjustedToUTC = (duckdb_type.id() == LogicalTypeId::TIME_TZ); - schema_ele.logicalType.TIME.unit.__isset.MICROS = true; - break; - case LogicalTypeId::TIMESTAMP_TZ: - case LogicalTypeId::TIMESTAMP: - case LogicalTypeId::TIMESTAMP_NS: - case LogicalTypeId::TIMESTAMP_SEC: - schema_ele.converted_type = ConvertedType::TIMESTAMP_MICROS; - schema_ele.__isset.converted_type = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.TIMESTAMP = true; - schema_ele.logicalType.TIMESTAMP.isAdjustedToUTC = (duckdb_type.id() == LogicalTypeId::TIMESTAMP_TZ); - schema_ele.logicalType.TIMESTAMP.unit.__isset.MICROS = true; - break; - case LogicalTypeId::TIMESTAMP_MS: - schema_ele.converted_type = ConvertedType::TIMESTAMP_MILLIS; - schema_ele.__isset.converted_type = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.TIMESTAMP = true; - schema_ele.logicalType.TIMESTAMP.isAdjustedToUTC = false; - schema_ele.logicalType.TIMESTAMP.unit.__isset.MILLIS = true; - break; - case LogicalTypeId::ENUM: - case LogicalTypeId::VARCHAR: - schema_ele.converted_type = ConvertedType::UTF8; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::INTERVAL: - schema_ele.type_length = 12; - schema_ele.converted_type = ConvertedType::INTERVAL; - schema_ele.__isset.type_length = true; - schema_ele.__isset.converted_type = true; - break; - case LogicalTypeId::UUID: - schema_ele.type_length = 16; - schema_ele.__isset.type_length = true; - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.UUID = true; - break; - case LogicalTypeId::DECIMAL: - schema_ele.converted_type = ConvertedType::DECIMAL; - schema_ele.precision = DecimalType::GetWidth(duckdb_type); - schema_ele.scale = DecimalType::GetScale(duckdb_type); - schema_ele.__isset.converted_type = true; - schema_ele.__isset.precision = true; - schema_ele.__isset.scale = true; - if (duckdb_type.InternalType() == PhysicalType::INT128) { - schema_ele.type_length = 16; - schema_ele.__isset.type_length = true; - } - schema_ele.__isset.logicalType = true; - schema_ele.logicalType.__isset.DECIMAL = true; - schema_ele.logicalType.DECIMAL.precision = schema_ele.precision; - schema_ele.logicalType.DECIMAL.scale = schema_ele.scale; - break; - default: - break; - } -} - -void VerifyUniqueNames(const vector &names) { -#ifdef DEBUG - unordered_set name_set; - name_set.reserve(names.size()); - for (auto &column : names) { - auto res = name_set.insert(column); - D_ASSERT(res.second == true); - } - // If there would be duplicates, these sizes would differ - D_ASSERT(name_set.size() == names.size()); -#endif -} - -ParquetWriter::ParquetWriter(FileSystem &fs, string file_name_p, vector types_p, vector names_p, - CompressionCodec::type codec, ChildFieldIDs field_ids_p) - : file_name(std::move(file_name_p)), sql_types(std::move(types_p)), column_names(std::move(names_p)), codec(codec), - field_ids(std::move(field_ids_p)) { - // initialize the file writer - writer = make_uniq(fs, file_name.c_str(), - FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW); - // parquet files start with the string "PAR1" - writer->WriteData(const_data_ptr_cast("PAR1"), 4); - TCompactProtocolFactoryT tproto_factory; - protocol = tproto_factory.getProtocol(make_shared(*writer)); - - file_meta_data.num_rows = 0; - file_meta_data.version = 1; - - file_meta_data.__isset.created_by = true; - file_meta_data.created_by = "DuckDB"; - - file_meta_data.schema.resize(1); - - // populate root schema object - file_meta_data.schema[0].name = "duckdb_schema"; - file_meta_data.schema[0].num_children = sql_types.size(); - file_meta_data.schema[0].__isset.num_children = true; - file_meta_data.schema[0].repetition_type = duckdb_parquet::format::FieldRepetitionType::REQUIRED; - file_meta_data.schema[0].__isset.repetition_type = true; - - auto &unique_names = column_names; - VerifyUniqueNames(unique_names); - - vector schema_path; - for (idx_t i = 0; i < sql_types.size(); i++) { - column_writers.push_back(ColumnWriter::CreateWriterRecursive(file_meta_data.schema, *this, sql_types[i], - unique_names[i], schema_path, &field_ids)); - } -} - -void ParquetWriter::PrepareRowGroup(ColumnDataCollection &buffer, PreparedRowGroup &result) { - // We write 8 columns at a time so that iterating over ColumnDataCollection is more efficient - static constexpr idx_t COLUMNS_PER_PASS = 8; - - // We want these to be in-memory/hybrid so we don't have to copy over strings to the dictionary - D_ASSERT(buffer.GetAllocatorType() == ColumnDataAllocatorType::IN_MEMORY_ALLOCATOR || - buffer.GetAllocatorType() == ColumnDataAllocatorType::HYBRID); - - // set up a new row group for this chunk collection - auto &row_group = result.row_group; - row_group.num_rows = buffer.Count(); - row_group.total_byte_size = buffer.SizeInBytes(); - row_group.__isset.file_offset = true; - - auto &states = result.states; - // iterate over each of the columns of the chunk collection and write them - D_ASSERT(buffer.ColumnCount() == column_writers.size()); - for (idx_t col_idx = 0; col_idx < buffer.ColumnCount(); col_idx += COLUMNS_PER_PASS) { - const auto next = MinValue(buffer.ColumnCount() - col_idx, COLUMNS_PER_PASS); - vector column_ids; - vector> col_writers; - vector> write_states; - for (idx_t i = 0; i < next; i++) { - column_ids.emplace_back(col_idx + i); - col_writers.emplace_back(*column_writers[column_ids.back()]); - write_states.emplace_back(col_writers.back().get().InitializeWriteState(row_group)); - } - - for (auto &chunk : buffer.Chunks({column_ids})) { - for (idx_t i = 0; i < next; i++) { - if (col_writers[i].get().HasAnalyze()) { - col_writers[i].get().Analyze(*write_states[i], nullptr, chunk.data[i], chunk.size()); - } - } - } - - for (idx_t i = 0; i < next; i++) { - if (col_writers[i].get().HasAnalyze()) { - col_writers[i].get().FinalizeAnalyze(*write_states[i]); - } - } - - for (auto &chunk : buffer.Chunks({column_ids})) { - for (idx_t i = 0; i < next; i++) { - col_writers[i].get().Prepare(*write_states[i], nullptr, chunk.data[i], chunk.size()); - } - } - - for (idx_t i = 0; i < next; i++) { - col_writers[i].get().BeginWrite(*write_states[i]); - } - - for (auto &chunk : buffer.Chunks({column_ids})) { - for (idx_t i = 0; i < next; i++) { - col_writers[i].get().Write(*write_states[i], chunk.data[i], chunk.size()); - } - } - - for (auto &write_state : write_states) { - states.push_back(std::move(write_state)); - } - } - result.heaps = buffer.GetHeapReferences(); -} - -void ParquetWriter::FlushRowGroup(PreparedRowGroup &prepared) { - lock_guard glock(lock); - auto &row_group = prepared.row_group; - auto &states = prepared.states; - if (states.empty()) { - throw InternalException("Attempting to flush a row group with no rows"); - } - row_group.file_offset = writer->GetTotalWritten(); - for (idx_t col_idx = 0; col_idx < states.size(); col_idx++) { - const auto &col_writer = column_writers[col_idx]; - auto write_state = std::move(states[col_idx]); - col_writer->FinalizeWrite(*write_state); - } - - // append the row group to the file meta data - file_meta_data.row_groups.push_back(row_group); - file_meta_data.num_rows += row_group.num_rows; - - prepared.heaps.clear(); -} - -void ParquetWriter::Flush(ColumnDataCollection &buffer) { - if (buffer.Count() == 0) { - return; - } - - PreparedRowGroup prepared_row_group; - PrepareRowGroup(buffer, prepared_row_group); - buffer.Reset(); - - FlushRowGroup(prepared_row_group); -} - -void ParquetWriter::Finalize() { - auto start_offset = writer->GetTotalWritten(); - file_meta_data.write(protocol.get()); - - writer->Write(writer->GetTotalWritten() - start_offset); - - // parquet files also end with the string "PAR1" - writer->WriteData(const_data_ptr_cast("PAR1"), 4); - - // flush to disk - writer->Sync(); - writer.reset(); -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/serialize_parquet.cpp b/src/duckdb/extension/parquet/serialize_parquet.cpp deleted file mode 100644 index 030683dc6..000000000 --- a/src/duckdb/extension/parquet/serialize_parquet.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===----------------------------------------------------------------------===// -// This file is automatically generated by scripts/generate_serialization.py -// Do not edit this file manually, your changes will be overwritten -//===----------------------------------------------------------------------===// - -#include "duckdb/common/serializer/format_serializer.hpp" -#include "duckdb/common/serializer/format_deserializer.hpp" -#include "parquet_reader.hpp" - -namespace duckdb { - -void ParquetOptions::FormatSerialize(FormatSerializer &serializer) const { - serializer.WriteProperty(100, "binary_as_string", binary_as_string); - serializer.WriteProperty(101, "file_row_number", file_row_number); - serializer.WriteProperty(102, "file_options", file_options); -} - -ParquetOptions ParquetOptions::FormatDeserialize(FormatDeserializer &deserializer) { - ParquetOptions result; - deserializer.ReadProperty(100, "binary_as_string", result.binary_as_string); - deserializer.ReadProperty(101, "file_row_number", result.file_row_number); - deserializer.ReadProperty(102, "file_options", result.file_options); - return result; -} - -} // namespace duckdb diff --git a/src/duckdb/extension/parquet/zstd_file_system.cpp b/src/duckdb/extension/parquet/zstd_file_system.cpp deleted file mode 100644 index 08a477cf7..000000000 --- a/src/duckdb/extension/parquet/zstd_file_system.cpp +++ /dev/null @@ -1,183 +0,0 @@ -#include "zstd_file_system.hpp" -#include "zstd.h" - -namespace duckdb { - -struct ZstdStreamWrapper : public StreamWrapper { - ~ZstdStreamWrapper() override; - - CompressedFile *file = nullptr; - duckdb_zstd::ZSTD_DStream *zstd_stream_ptr = nullptr; - duckdb_zstd::ZSTD_CStream *zstd_compress_ptr = nullptr; - bool writing = false; - -public: - void Initialize(CompressedFile &file, bool write) override; - bool Read(StreamData &stream_data) override; - void Write(CompressedFile &file, StreamData &stream_data, data_ptr_t buffer, int64_t nr_bytes) override; - - void Close() override; - - void FlushStream(); -}; - -ZstdStreamWrapper::~ZstdStreamWrapper() { - if (Exception::UncaughtException()) { - return; - } - try { - Close(); - } catch (...) { - } -} - -void ZstdStreamWrapper::Initialize(CompressedFile &file, bool write) { - Close(); - this->file = &file; - this->writing = write; - if (write) { - zstd_compress_ptr = duckdb_zstd::ZSTD_createCStream(); - } else { - zstd_stream_ptr = duckdb_zstd::ZSTD_createDStream(); - } -} - -bool ZstdStreamWrapper::Read(StreamData &sd) { - D_ASSERT(!writing); - - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = sd.in_buff_start; - in_buffer.size = sd.in_buff_end - sd.in_buff_start; - in_buffer.pos = 0; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = sd.out_buf_size; - out_buffer.pos = 0; - - auto res = duckdb_zstd::ZSTD_decompressStream(zstd_stream_ptr, &out_buffer, &in_buffer); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - - sd.in_buff_start = (data_ptr_t)in_buffer.src + in_buffer.pos; // NOLINT - sd.in_buff_end = (data_ptr_t)in_buffer.src + in_buffer.size; // NOLINT - sd.out_buff_end = (data_ptr_t)out_buffer.dst + out_buffer.pos; // NOLINT - return false; -} - -void ZstdStreamWrapper::Write(CompressedFile &file, StreamData &sd, data_ptr_t uncompressed_data, - int64_t uncompressed_size) { - D_ASSERT(writing); - - auto remaining = uncompressed_size; - while (remaining > 0) { - D_ASSERT(sd.out_buff.get() + sd.out_buf_size > sd.out_buff_start); - idx_t output_remaining = (sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_start; - - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = uncompressed_data; - in_buffer.size = remaining; - in_buffer.pos = 0; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = output_remaining; - out_buffer.pos = 0; - auto res = - duckdb_zstd::ZSTD_compressStream2(zstd_compress_ptr, &out_buffer, &in_buffer, duckdb_zstd::ZSTD_e_continue); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - idx_t input_consumed = in_buffer.pos; - idx_t written_to_output = out_buffer.pos; - sd.out_buff_start += written_to_output; - if (sd.out_buff_start == sd.out_buff.get() + sd.out_buf_size) { - // no more output buffer available: flush - file.child_handle->Write(sd.out_buff.get(), sd.out_buff_start - sd.out_buff.get()); - sd.out_buff_start = sd.out_buff.get(); - } - uncompressed_data += input_consumed; - remaining -= input_consumed; - } -} - -void ZstdStreamWrapper::FlushStream() { - auto &sd = file->stream_data; - duckdb_zstd::ZSTD_inBuffer in_buffer; - duckdb_zstd::ZSTD_outBuffer out_buffer; - - in_buffer.src = nullptr; - in_buffer.size = 0; - in_buffer.pos = 0; - while (true) { - idx_t output_remaining = (sd.out_buff.get() + sd.out_buf_size) - sd.out_buff_start; - - out_buffer.dst = sd.out_buff_start; - out_buffer.size = output_remaining; - out_buffer.pos = 0; - - auto res = - duckdb_zstd::ZSTD_compressStream2(zstd_compress_ptr, &out_buffer, &in_buffer, duckdb_zstd::ZSTD_e_end); - if (duckdb_zstd::ZSTD_isError(res)) { - throw IOException(duckdb_zstd::ZSTD_getErrorName(res)); - } - idx_t written_to_output = out_buffer.pos; - sd.out_buff_start += written_to_output; - if (sd.out_buff_start > sd.out_buff.get()) { - file->child_handle->Write(sd.out_buff.get(), sd.out_buff_start - sd.out_buff.get()); - sd.out_buff_start = sd.out_buff.get(); - } - if (res == 0) { - break; - } - } -} - -void ZstdStreamWrapper::Close() { - if (!zstd_stream_ptr && !zstd_compress_ptr) { - return; - } - if (writing) { - FlushStream(); - } - if (zstd_stream_ptr) { - duckdb_zstd::ZSTD_freeDStream(zstd_stream_ptr); - } - if (zstd_compress_ptr) { - duckdb_zstd::ZSTD_freeCStream(zstd_compress_ptr); - } - zstd_stream_ptr = nullptr; - zstd_compress_ptr = nullptr; -} - -class ZStdFile : public CompressedFile { -public: - ZStdFile(unique_ptr child_handle_p, const string &path, bool write) - : CompressedFile(zstd_fs, std::move(child_handle_p), path) { - Initialize(write); - } - - ZStdFileSystem zstd_fs; -}; - -unique_ptr ZStdFileSystem::OpenCompressedFile(unique_ptr handle, bool write) { - auto path = handle->path; - return make_uniq(std::move(handle), path, write); -} - -unique_ptr ZStdFileSystem::CreateStream() { - return make_uniq(); -} - -idx_t ZStdFileSystem::InBufferSize() { - return duckdb_zstd::ZSTD_DStreamInSize(); -} - -idx_t ZStdFileSystem::OutBufferSize() { - return duckdb_zstd::ZSTD_DStreamOutSize(); -} - -} // namespace duckdb diff --git a/src/duckdb/third_party/parquet/parquet_constants.cpp b/src/duckdb/third_party/parquet/parquet_constants.cpp deleted file mode 100644 index de4420ba0..000000000 --- a/src/duckdb/third_party/parquet/parquet_constants.cpp +++ /dev/null @@ -1,17 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#include "parquet_constants.h" - -namespace duckdb_parquet { namespace format { - -const parquetConstants g_parquet_constants; - -parquetConstants::parquetConstants() { -} - -}} // namespace - diff --git a/src/duckdb/third_party/parquet/parquet_constants.h b/src/duckdb/third_party/parquet/parquet_constants.h deleted file mode 100644 index 468309ce8..000000000 --- a/src/duckdb/third_party/parquet/parquet_constants.h +++ /dev/null @@ -1,24 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#ifndef parquet_CONSTANTS_H -#define parquet_CONSTANTS_H - -#include "parquet_types.h" - -namespace duckdb_parquet { namespace format { - -class parquetConstants { - public: - parquetConstants(); - -}; - -extern const parquetConstants g_parquet_constants; - -}} // namespace - -#endif diff --git a/src/duckdb/third_party/parquet/parquet_types.cpp b/src/duckdb/third_party/parquet/parquet_types.cpp deleted file mode 100644 index daa065bcd..000000000 --- a/src/duckdb/third_party/parquet/parquet_types.cpp +++ /dev/null @@ -1,6678 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#include "parquet_types.h" - -#include -#include - -#include "thrift/TToString.h" - -namespace duckdb_parquet { namespace format { - -int _kTypeValues[] = { - Type::BOOLEAN, - Type::INT32, - Type::INT64, - Type::INT96, - Type::FLOAT, - Type::DOUBLE, - Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY -}; -const char* _kTypeNames[] = { - "BOOLEAN", - "INT32", - "INT64", - "INT96", - "FLOAT", - "DOUBLE", - "BYTE_ARRAY", - "FIXED_LEN_BYTE_ARRAY" -}; -const std::map _Type_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kTypeValues, _kTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const Type::type& val) { - std::map::const_iterator it = _Type_VALUES_TO_NAMES.find(val); - if (it != _Type_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kConvertedTypeValues[] = { - ConvertedType::UTF8, - ConvertedType::MAP, - ConvertedType::MAP_KEY_VALUE, - ConvertedType::LIST, - ConvertedType::ENUM, - ConvertedType::DECIMAL, - ConvertedType::DATE, - ConvertedType::TIME_MILLIS, - ConvertedType::TIME_MICROS, - ConvertedType::TIMESTAMP_MILLIS, - ConvertedType::TIMESTAMP_MICROS, - ConvertedType::UINT_8, - ConvertedType::UINT_16, - ConvertedType::UINT_32, - ConvertedType::UINT_64, - ConvertedType::INT_8, - ConvertedType::INT_16, - ConvertedType::INT_32, - ConvertedType::INT_64, - ConvertedType::JSON, - ConvertedType::BSON, - ConvertedType::INTERVAL -}; -const char* _kConvertedTypeNames[] = { - "UTF8", - "MAP", - "MAP_KEY_VALUE", - "LIST", - "ENUM", - "DECIMAL", - "DATE", - "TIME_MILLIS", - "TIME_MICROS", - "TIMESTAMP_MILLIS", - "TIMESTAMP_MICROS", - "UINT_8", - "UINT_16", - "UINT_32", - "UINT_64", - "INT_8", - "INT_16", - "INT_32", - "INT_64", - "JSON", - "BSON", - "INTERVAL" -}; -const std::map _ConvertedType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(22, _kConvertedTypeValues, _kConvertedTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val) { - std::map::const_iterator it = _ConvertedType_VALUES_TO_NAMES.find(val); - if (it != _ConvertedType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kFieldRepetitionTypeValues[] = { - FieldRepetitionType::REQUIRED, - FieldRepetitionType::OPTIONAL, - FieldRepetitionType::REPEATED -}; -const char* _kFieldRepetitionTypeNames[] = { - "REQUIRED", - "OPTIONAL", - "REPEATED" -}; -const std::map _FieldRepetitionType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kFieldRepetitionTypeValues, _kFieldRepetitionTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val) { - std::map::const_iterator it = _FieldRepetitionType_VALUES_TO_NAMES.find(val); - if (it != _FieldRepetitionType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kEncodingValues[] = { - Encoding::PLAIN, - Encoding::PLAIN_DICTIONARY, - Encoding::RLE, - Encoding::BIT_PACKED, - Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY, - Encoding::RLE_DICTIONARY -}; -const char* _kEncodingNames[] = { - "PLAIN", - "PLAIN_DICTIONARY", - "RLE", - "BIT_PACKED", - "DELTA_BINARY_PACKED", - "DELTA_LENGTH_BYTE_ARRAY", - "DELTA_BYTE_ARRAY", - "RLE_DICTIONARY" -}; -const std::map _Encoding_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(8, _kEncodingValues, _kEncodingNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val) { - std::map::const_iterator it = _Encoding_VALUES_TO_NAMES.find(val); - if (it != _Encoding_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kCompressionCodecValues[] = { - CompressionCodec::UNCOMPRESSED, - CompressionCodec::SNAPPY, - CompressionCodec::GZIP, - CompressionCodec::LZO, - CompressionCodec::BROTLI, - CompressionCodec::LZ4, - CompressionCodec::ZSTD -}; -const char* _kCompressionCodecNames[] = { - "UNCOMPRESSED", - "SNAPPY", - "GZIP", - "LZO", - "BROTLI", - "LZ4", - "ZSTD" -}; -const std::map _CompressionCodec_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(7, _kCompressionCodecValues, _kCompressionCodecNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val) { - std::map::const_iterator it = _CompressionCodec_VALUES_TO_NAMES.find(val); - if (it != _CompressionCodec_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kPageTypeValues[] = { - PageType::DATA_PAGE, - PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2 -}; -const char* _kPageTypeNames[] = { - "DATA_PAGE", - "INDEX_PAGE", - "DICTIONARY_PAGE", - "DATA_PAGE_V2" -}; -const std::map _PageType_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(4, _kPageTypeValues, _kPageTypeNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const PageType::type& val) { - std::map::const_iterator it = _PageType_VALUES_TO_NAMES.find(val); - if (it != _PageType_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - -int _kBoundaryOrderValues[] = { - BoundaryOrder::UNORDERED, - BoundaryOrder::ASCENDING, - BoundaryOrder::DESCENDING -}; -const char* _kBoundaryOrderNames[] = { - "UNORDERED", - "ASCENDING", - "DESCENDING" -}; -const std::map _BoundaryOrder_VALUES_TO_NAMES(::duckdb_apache::thrift::TEnumIterator(3, _kBoundaryOrderValues, _kBoundaryOrderNames), ::duckdb_apache::thrift::TEnumIterator(-1, NULL, NULL)); - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val) { - std::map::const_iterator it = _BoundaryOrder_VALUES_TO_NAMES.find(val); - if (it != _BoundaryOrder_VALUES_TO_NAMES.end()) { - out << it->second; - } else { - out << static_cast(val); - } - return out; -} - - -Statistics::~Statistics() throw() { -} - - -void Statistics::__set_max(const std::string& val) { - this->max = val; -__isset.max = true; -} - -void Statistics::__set_min(const std::string& val) { - this->min = val; -__isset.min = true; -} - -void Statistics::__set_null_count(const int64_t val) { - this->null_count = val; -__isset.null_count = true; -} - -void Statistics::__set_distinct_count(const int64_t val) { - this->distinct_count = val; -__isset.distinct_count = true; -} - -void Statistics::__set_max_value(const std::string& val) { - this->max_value = val; -__isset.max_value = true; -} - -void Statistics::__set_min_value(const std::string& val) { - this->min_value = val; -__isset.min_value = true; -} -std::ostream& operator<<(std::ostream& out, const Statistics& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t Statistics::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::duckdb_apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(Statistics &a, Statistics &b) { - using ::std::swap; - swap(a.max, b.max); - swap(a.min, b.min); - swap(a.null_count, b.null_count); - swap(a.distinct_count, b.distinct_count); - swap(a.max_value, b.max_value); - swap(a.min_value, b.min_value); - swap(a.__isset, b.__isset); -} - -Statistics::Statistics(const Statistics& other0) { - max = other0.max; - min = other0.min; - null_count = other0.null_count; - distinct_count = other0.distinct_count; - max_value = other0.max_value; - min_value = other0.min_value; - __isset = other0.__isset; -} -Statistics& Statistics::operator=(const Statistics& other1) { - max = other1.max; - min = other1.min; - null_count = other1.null_count; - distinct_count = other1.distinct_count; - max_value = other1.max_value; - min_value = other1.min_value; - __isset = other1.__isset; - return *this; -} -void Statistics::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "Statistics("; - out << "max="; (__isset.max ? (out << to_string(max)) : (out << "")); - out << ", " << "min="; (__isset.min ? (out << to_string(min)) : (out << "")); - out << ", " << "null_count="; (__isset.null_count ? (out << to_string(null_count)) : (out << "")); - out << ", " << "distinct_count="; (__isset.distinct_count ? (out << to_string(distinct_count)) : (out << "")); - out << ", " << "max_value="; (__isset.max_value ? (out << to_string(max_value)) : (out << "")); - out << ", " << "min_value="; (__isset.min_value ? (out << to_string(min_value)) : (out << "")); - out << ")"; -} - - -StringType::~StringType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const StringType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t StringType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(StringType &a, StringType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -StringType::StringType(const StringType& other2) { - (void) other2; -} -StringType& StringType::operator=(const StringType& other3) { - (void) other3; - return *this; -} -void StringType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "StringType("; - out << ")"; -} - - -UUIDType::~UUIDType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t UUIDType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(UUIDType &a, UUIDType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -UUIDType::UUIDType(const UUIDType& other4) { - (void) other4; -} -UUIDType& UUIDType::operator=(const UUIDType& other5) { - (void) other5; - return *this; -} -void UUIDType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "UUIDType("; - out << ")"; -} - - -MapType::~MapType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MapType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MapType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MapType &a, MapType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MapType::MapType(const MapType& other6) { - (void) other6; -} -MapType& MapType::operator=(const MapType& other7) { - (void) other7; - return *this; -} -void MapType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MapType("; - out << ")"; -} - - -ListType::~ListType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const ListType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ListType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ListType &a, ListType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -ListType::ListType(const ListType& other8) { - (void) other8; -} -ListType& ListType::operator=(const ListType& other9) { - (void) other9; - return *this; -} -void ListType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ListType("; - out << ")"; -} - - -EnumType::~EnumType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const EnumType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EnumType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EnumType &a, EnumType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -EnumType::EnumType(const EnumType& other10) { - (void) other10; -} -EnumType& EnumType::operator=(const EnumType& other11) { - (void) other11; - return *this; -} -void EnumType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EnumType("; - out << ")"; -} - - -DateType::~DateType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const DateType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DateType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DateType &a, DateType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -DateType::DateType(const DateType& other12) { - (void) other12; -} -DateType& DateType::operator=(const DateType& other13) { - (void) other13; - return *this; -} -void DateType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DateType("; - out << ")"; -} - - -NullType::~NullType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const NullType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t NullType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NullType &a, NullType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -NullType::NullType(const NullType& other14) { - (void) other14; -} -NullType& NullType::operator=(const NullType& other15) { - (void) other15; - return *this; -} -void NullType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NullType("; - out << ")"; -} - - -DecimalType::~DecimalType() throw() { -} - - -void DecimalType::__set_scale(const int32_t val) { - this->scale = val; -} - -void DecimalType::__set_precision(const int32_t val) { - this->precision = val; -} -std::ostream& operator<<(std::ostream& out, const DecimalType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DecimalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DecimalType &a, DecimalType &b) { - using ::std::swap; - swap(a.scale, b.scale); - swap(a.precision, b.precision); -} - -DecimalType::DecimalType(const DecimalType& other16) { - scale = other16.scale; - precision = other16.precision; -} -DecimalType& DecimalType::operator=(const DecimalType& other17) { - scale = other17.scale; - precision = other17.precision; - return *this; -} -void DecimalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DecimalType("; - out << "scale=" << to_string(scale); - out << ", " << "precision=" << to_string(precision); - out << ")"; -} - - -MilliSeconds::~MilliSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MilliSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MilliSeconds &a, MilliSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MilliSeconds::MilliSeconds(const MilliSeconds& other18) { - (void) other18; -} -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other19) { - (void) other19; - return *this; -} -void MilliSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MilliSeconds("; - out << ")"; -} - - -MicroSeconds::~MicroSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t MicroSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(MicroSeconds &a, MicroSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -MicroSeconds::MicroSeconds(const MicroSeconds& other20) { - (void) other20; -} -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other21) { - (void) other21; - return *this; -} -void MicroSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "MicroSeconds("; - out << ")"; -} - - -NanoSeconds::~NanoSeconds() throw() { -} - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t NanoSeconds::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(NanoSeconds &a, NanoSeconds &b) { - using ::std::swap; - (void) a; - (void) b; -} - -NanoSeconds::NanoSeconds(const NanoSeconds& other22) { - (void) other22; -} -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other23) { - (void) other23; - return *this; -} -void NanoSeconds::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "NanoSeconds("; - out << ")"; -} - - -TimeUnit::~TimeUnit() throw() { -} - - -void TimeUnit::__set_MILLIS(const MilliSeconds& val) { - this->MILLIS = val; -__isset.MILLIS = true; -} - -void TimeUnit::__set_MICROS(const MicroSeconds& val) { - this->MICROS = val; -__isset.MICROS = true; -} - -void TimeUnit::__set_NANOS(const NanoSeconds& val) { - this->NANOS = val; -__isset.NANOS = true; -} -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimeUnit::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeUnit &a, TimeUnit &b) { - using ::std::swap; - swap(a.MILLIS, b.MILLIS); - swap(a.MICROS, b.MICROS); - swap(a.NANOS, b.NANOS); - swap(a.__isset, b.__isset); -} - -TimeUnit::TimeUnit(const TimeUnit& other24) { - MILLIS = other24.MILLIS; - MICROS = other24.MICROS; - NANOS = other24.NANOS; - __isset = other24.__isset; -} -TimeUnit& TimeUnit::operator=(const TimeUnit& other25) { - MILLIS = other25.MILLIS; - MICROS = other25.MICROS; - NANOS = other25.NANOS; - __isset = other25.__isset; - return *this; -} -void TimeUnit::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeUnit("; - out << "MILLIS="; (__isset.MILLIS ? (out << to_string(MILLIS)) : (out << "")); - out << ", " << "MICROS="; (__isset.MICROS ? (out << to_string(MICROS)) : (out << "")); - out << ", " << "NANOS="; (__isset.NANOS ? (out << to_string(NANOS)) : (out << "")); - out << ")"; -} - - -TimestampType::~TimestampType() throw() { -} - - -void TimestampType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimestampType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimestampType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimestampType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimestampType &a, TimestampType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimestampType::TimestampType(const TimestampType& other26) { - isAdjustedToUTC = other26.isAdjustedToUTC; - unit = other26.unit; -} -TimestampType& TimestampType::operator=(const TimestampType& other27) { - isAdjustedToUTC = other27.isAdjustedToUTC; - unit = other27.unit; - return *this; -} -void TimestampType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimestampType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - - -TimeType::~TimeType() throw() { -} - - -void TimeType::__set_isAdjustedToUTC(const bool val) { - this->isAdjustedToUTC = val; -} - -void TimeType::__set_unit(const TimeUnit& val) { - this->unit = val; -} -std::ostream& operator<<(std::ostream& out, const TimeType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TimeType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::duckdb_apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TimeType &a, TimeType &b) { - using ::std::swap; - swap(a.isAdjustedToUTC, b.isAdjustedToUTC); - swap(a.unit, b.unit); -} - -TimeType::TimeType(const TimeType& other28) { - isAdjustedToUTC = other28.isAdjustedToUTC; - unit = other28.unit; -} -TimeType& TimeType::operator=(const TimeType& other29) { - isAdjustedToUTC = other29.isAdjustedToUTC; - unit = other29.unit; - return *this; -} -void TimeType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TimeType("; - out << "isAdjustedToUTC=" << to_string(isAdjustedToUTC); - out << ", " << "unit=" << to_string(unit); - out << ")"; -} - - -IntType::~IntType() throw() { -} - - -void IntType::__set_bitWidth(const int8_t val) { - this->bitWidth = val; -} - -void IntType::__set_isSigned(const bool val) { - this->isSigned = val; -} -std::ostream& operator<<(std::ostream& out, const IntType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t IntType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += oprot->writeFieldBegin("bitWidth", ::duckdb_apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("isSigned", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IntType &a, IntType &b) { - using ::std::swap; - swap(a.bitWidth, b.bitWidth); - swap(a.isSigned, b.isSigned); -} - -IntType::IntType(const IntType& other30) { - bitWidth = other30.bitWidth; - isSigned = other30.isSigned; -} -IntType& IntType::operator=(const IntType& other31) { - bitWidth = other31.bitWidth; - isSigned = other31.isSigned; - return *this; -} -void IntType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IntType("; - out << "bitWidth=" << to_string(bitWidth); - out << ", " << "isSigned=" << to_string(isSigned); - out << ")"; -} - - -JsonType::~JsonType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const JsonType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t JsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(JsonType &a, JsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -JsonType::JsonType(const JsonType& other32) { - (void) other32; -} -JsonType& JsonType::operator=(const JsonType& other33) { - (void) other33; - return *this; -} -void JsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "JsonType("; - out << ")"; -} - - -BsonType::~BsonType() throw() { -} - -std::ostream& operator<<(std::ostream& out, const BsonType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t BsonType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(BsonType &a, BsonType &b) { - using ::std::swap; - (void) a; - (void) b; -} - -BsonType::BsonType(const BsonType& other34) { - (void) other34; -} -BsonType& BsonType::operator=(const BsonType& other35) { - (void) other35; - return *this; -} -void BsonType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "BsonType("; - out << ")"; -} - - -LogicalType::~LogicalType() throw() { -} - - -void LogicalType::__set_STRING(const StringType& val) { - this->STRING = val; -__isset.STRING = true; -} - -void LogicalType::__set_MAP(const MapType& val) { - this->MAP = val; -__isset.MAP = true; -} - -void LogicalType::__set_LIST(const ListType& val) { - this->LIST = val; -__isset.LIST = true; -} - -void LogicalType::__set_ENUM(const EnumType& val) { - this->ENUM = val; -__isset.ENUM = true; -} - -void LogicalType::__set_DECIMAL(const DecimalType& val) { - this->DECIMAL = val; -__isset.DECIMAL = true; -} - -void LogicalType::__set_DATE(const DateType& val) { - this->DATE = val; -__isset.DATE = true; -} - -void LogicalType::__set_TIME(const TimeType& val) { - this->TIME = val; -__isset.TIME = true; -} - -void LogicalType::__set_TIMESTAMP(const TimestampType& val) { - this->TIMESTAMP = val; -__isset.TIMESTAMP = true; -} - -void LogicalType::__set_INTEGER(const IntType& val) { - this->INTEGER = val; -__isset.INTEGER = true; -} - -void LogicalType::__set_UNKNOWN(const NullType& val) { - this->UNKNOWN = val; -__isset.UNKNOWN = true; -} - -void LogicalType::__set_JSON(const JsonType& val) { - this->JSON = val; -__isset.JSON = true; -} - -void LogicalType::__set_BSON(const BsonType& val) { - this->BSON = val; -__isset.BSON = true; -} - -void LogicalType::__set_UUID(const UUIDType& val) { - this->UUID = val; -__isset.UUID = true; -} -std::ostream& operator<<(std::ostream& out, const LogicalType& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t LogicalType::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::duckdb_apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::duckdb_apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::duckdb_apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::duckdb_apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(LogicalType &a, LogicalType &b) { - using ::std::swap; - swap(a.STRING, b.STRING); - swap(a.MAP, b.MAP); - swap(a.LIST, b.LIST); - swap(a.ENUM, b.ENUM); - swap(a.DECIMAL, b.DECIMAL); - swap(a.DATE, b.DATE); - swap(a.TIME, b.TIME); - swap(a.TIMESTAMP, b.TIMESTAMP); - swap(a.INTEGER, b.INTEGER); - swap(a.UNKNOWN, b.UNKNOWN); - swap(a.JSON, b.JSON); - swap(a.BSON, b.BSON); - swap(a.UUID, b.UUID); - swap(a.__isset, b.__isset); -} - -LogicalType::LogicalType(const LogicalType& other36) { - STRING = other36.STRING; - MAP = other36.MAP; - LIST = other36.LIST; - ENUM = other36.ENUM; - DECIMAL = other36.DECIMAL; - DATE = other36.DATE; - TIME = other36.TIME; - TIMESTAMP = other36.TIMESTAMP; - INTEGER = other36.INTEGER; - UNKNOWN = other36.UNKNOWN; - JSON = other36.JSON; - BSON = other36.BSON; - UUID = other36.UUID; - __isset = other36.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other37) { - STRING = other37.STRING; - MAP = other37.MAP; - LIST = other37.LIST; - ENUM = other37.ENUM; - DECIMAL = other37.DECIMAL; - DATE = other37.DATE; - TIME = other37.TIME; - TIMESTAMP = other37.TIMESTAMP; - INTEGER = other37.INTEGER; - UNKNOWN = other37.UNKNOWN; - JSON = other37.JSON; - BSON = other37.BSON; - UUID = other37.UUID; - __isset = other37.__isset; - return *this; -} -void LogicalType::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "LogicalType("; - out << "STRING="; (__isset.STRING ? (out << to_string(STRING)) : (out << "")); - out << ", " << "MAP="; (__isset.MAP ? (out << to_string(MAP)) : (out << "")); - out << ", " << "LIST="; (__isset.LIST ? (out << to_string(LIST)) : (out << "")); - out << ", " << "ENUM="; (__isset.ENUM ? (out << to_string(ENUM)) : (out << "")); - out << ", " << "DECIMAL="; (__isset.DECIMAL ? (out << to_string(DECIMAL)) : (out << "")); - out << ", " << "DATE="; (__isset.DATE ? (out << to_string(DATE)) : (out << "")); - out << ", " << "TIME="; (__isset.TIME ? (out << to_string(TIME)) : (out << "")); - out << ", " << "TIMESTAMP="; (__isset.TIMESTAMP ? (out << to_string(TIMESTAMP)) : (out << "")); - out << ", " << "INTEGER="; (__isset.INTEGER ? (out << to_string(INTEGER)) : (out << "")); - out << ", " << "UNKNOWN="; (__isset.UNKNOWN ? (out << to_string(UNKNOWN)) : (out << "")); - out << ", " << "JSON="; (__isset.JSON ? (out << to_string(JSON)) : (out << "")); - out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); - out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); - out << ")"; -} - - -SchemaElement::~SchemaElement() throw() { -} - - -void SchemaElement::__set_type(const Type::type val) { - this->type = val; -__isset.type = true; -} - -void SchemaElement::__set_type_length(const int32_t val) { - this->type_length = val; -__isset.type_length = true; -} - -void SchemaElement::__set_repetition_type(const FieldRepetitionType::type val) { - this->repetition_type = val; -__isset.repetition_type = true; -} - -void SchemaElement::__set_name(const std::string& val) { - this->name = val; -} - -void SchemaElement::__set_num_children(const int32_t val) { - this->num_children = val; -__isset.num_children = true; -} - -void SchemaElement::__set_converted_type(const ConvertedType::type val) { - this->converted_type = val; -__isset.converted_type = true; -} - -void SchemaElement::__set_scale(const int32_t val) { - this->scale = val; -__isset.scale = true; -} - -void SchemaElement::__set_precision(const int32_t val) { - this->precision = val; -__isset.precision = true; -} - -void SchemaElement::__set_field_id(const int32_t val) { - this->field_id = val; -__isset.field_id = true; -} - -void SchemaElement::__set_logicalType(const LogicalType& val) { - this->logicalType = val; -__isset.logicalType = true; -} -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t SchemaElement::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast38; - xfer += iprot->readI32(ecast38); - this->type = (Type::type)ecast38; - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast39; - xfer += iprot->readI32(ecast39); - this->repetition_type = (FieldRepetitionType::type)ecast39; - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast40; - xfer += iprot->readI32(ecast40); - this->converted_type = (ConvertedType::type)ecast40; - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->repetition_type); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::duckdb_apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32((int32_t)this->converted_type); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::duckdb_apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::duckdb_apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::duckdb_apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SchemaElement &a, SchemaElement &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.type_length, b.type_length); - swap(a.repetition_type, b.repetition_type); - swap(a.name, b.name); - swap(a.num_children, b.num_children); - swap(a.converted_type, b.converted_type); - swap(a.scale, b.scale); - swap(a.precision, b.precision); - swap(a.field_id, b.field_id); - swap(a.logicalType, b.logicalType); - swap(a.__isset, b.__isset); -} - -SchemaElement::SchemaElement(const SchemaElement& other41) { - type = other41.type; - type_length = other41.type_length; - repetition_type = other41.repetition_type; - name = other41.name; - num_children = other41.num_children; - converted_type = other41.converted_type; - scale = other41.scale; - precision = other41.precision; - field_id = other41.field_id; - logicalType = other41.logicalType; - __isset = other41.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other42) { - type = other42.type; - type_length = other42.type_length; - repetition_type = other42.repetition_type; - name = other42.name; - num_children = other42.num_children; - converted_type = other42.converted_type; - scale = other42.scale; - precision = other42.precision; - field_id = other42.field_id; - logicalType = other42.logicalType; - __isset = other42.__isset; - return *this; -} -void SchemaElement::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SchemaElement("; - out << "type="; (__isset.type ? (out << to_string(type)) : (out << "")); - out << ", " << "type_length="; (__isset.type_length ? (out << to_string(type_length)) : (out << "")); - out << ", " << "repetition_type="; (__isset.repetition_type ? (out << to_string(repetition_type)) : (out << "")); - out << ", " << "name=" << to_string(name); - out << ", " << "num_children="; (__isset.num_children ? (out << to_string(num_children)) : (out << "")); - out << ", " << "converted_type="; (__isset.converted_type ? (out << to_string(converted_type)) : (out << "")); - out << ", " << "scale="; (__isset.scale ? (out << to_string(scale)) : (out << "")); - out << ", " << "precision="; (__isset.precision ? (out << to_string(precision)) : (out << "")); - out << ", " << "field_id="; (__isset.field_id ? (out << to_string(field_id)) : (out << "")); - out << ", " << "logicalType="; (__isset.logicalType ? (out << to_string(logicalType)) : (out << "")); - out << ")"; -} - - -DataPageHeader::~DataPageHeader() throw() { -} - - -void DataPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeader::__set_definition_level_encoding(const Encoding::type val) { - this->definition_level_encoding = val; -} - -void DataPageHeader::__set_repetition_level_encoding(const Encoding::type val) { - this->repetition_level_encoding = val; -} - -void DataPageHeader::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DataPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast43; - xfer += iprot->readI32(ecast43); - this->encoding = (Encoding::type)ecast43; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast44; - xfer += iprot->readI32(ecast44); - this->definition_level_encoding = (Encoding::type)ecast44; - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast45; - xfer += iprot->readI32(ecast45); - this->repetition_level_encoding = (Encoding::type)ecast45; - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32((int32_t)this->definition_level_encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->repetition_level_encoding); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeader &a, DataPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.definition_level_encoding, b.definition_level_encoding); - swap(a.repetition_level_encoding, b.repetition_level_encoding); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeader::DataPageHeader(const DataPageHeader& other46) { - num_values = other46.num_values; - encoding = other46.encoding; - definition_level_encoding = other46.definition_level_encoding; - repetition_level_encoding = other46.repetition_level_encoding; - statistics = other46.statistics; - __isset = other46.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other47) { - num_values = other47.num_values; - encoding = other47.encoding; - definition_level_encoding = other47.definition_level_encoding; - repetition_level_encoding = other47.repetition_level_encoding; - statistics = other47.statistics; - __isset = other47.__isset; - return *this; -} -void DataPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_level_encoding=" << to_string(definition_level_encoding); - out << ", " << "repetition_level_encoding=" << to_string(repetition_level_encoding); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - - -IndexPageHeader::~IndexPageHeader() throw() { -} - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t IndexPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(IndexPageHeader &a, IndexPageHeader &b) { - using ::std::swap; - (void) a; - (void) b; -} - -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other48) { - (void) other48; -} -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other49) { - (void) other49; - return *this; -} -void IndexPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "IndexPageHeader("; - out << ")"; -} - - -DictionaryPageHeader::~DictionaryPageHeader() throw() { -} - - -void DictionaryPageHeader::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DictionaryPageHeader::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DictionaryPageHeader::__set_is_sorted(const bool val) { - this->is_sorted = val; -__isset.is_sorted = true; -} -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DictionaryPageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast50; - xfer += iprot->readI32(ecast50); - this->encoding = (Encoding::type)ecast50; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.encoding, b.encoding); - swap(a.is_sorted, b.is_sorted); - swap(a.__isset, b.__isset); -} - -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other51) { - num_values = other51.num_values; - encoding = other51.encoding; - is_sorted = other51.is_sorted; - __isset = other51.__isset; -} -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other52) { - num_values = other52.num_values; - encoding = other52.encoding; - is_sorted = other52.is_sorted; - __isset = other52.__isset; - return *this; -} -void DictionaryPageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DictionaryPageHeader("; - out << "num_values=" << to_string(num_values); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "is_sorted="; (__isset.is_sorted ? (out << to_string(is_sorted)) : (out << "")); - out << ")"; -} - - -DataPageHeaderV2::~DataPageHeaderV2() throw() { -} - - -void DataPageHeaderV2::__set_num_values(const int32_t val) { - this->num_values = val; -} - -void DataPageHeaderV2::__set_num_nulls(const int32_t val) { - this->num_nulls = val; -} - -void DataPageHeaderV2::__set_num_rows(const int32_t val) { - this->num_rows = val; -} - -void DataPageHeaderV2::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void DataPageHeaderV2::__set_definition_levels_byte_length(const int32_t val) { - this->definition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_repetition_levels_byte_length(const int32_t val) { - this->repetition_levels_byte_length = val; -} - -void DataPageHeaderV2::__set_is_compressed(const bool val) { - this->is_compressed = val; -__isset.is_compressed = true; -} - -void DataPageHeaderV2::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t DataPageHeaderV2::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast53; - xfer += iprot->readI32(ecast53); - this->encoding = (Encoding::type)ecast53; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::duckdb_apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::duckdb_apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { - using ::std::swap; - swap(a.num_values, b.num_values); - swap(a.num_nulls, b.num_nulls); - swap(a.num_rows, b.num_rows); - swap(a.encoding, b.encoding); - swap(a.definition_levels_byte_length, b.definition_levels_byte_length); - swap(a.repetition_levels_byte_length, b.repetition_levels_byte_length); - swap(a.is_compressed, b.is_compressed); - swap(a.statistics, b.statistics); - swap(a.__isset, b.__isset); -} - -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other54) { - num_values = other54.num_values; - num_nulls = other54.num_nulls; - num_rows = other54.num_rows; - encoding = other54.encoding; - definition_levels_byte_length = other54.definition_levels_byte_length; - repetition_levels_byte_length = other54.repetition_levels_byte_length; - is_compressed = other54.is_compressed; - statistics = other54.statistics; - __isset = other54.__isset; -} -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other55) { - num_values = other55.num_values; - num_nulls = other55.num_nulls; - num_rows = other55.num_rows; - encoding = other55.encoding; - definition_levels_byte_length = other55.definition_levels_byte_length; - repetition_levels_byte_length = other55.repetition_levels_byte_length; - is_compressed = other55.is_compressed; - statistics = other55.statistics; - __isset = other55.__isset; - return *this; -} -void DataPageHeaderV2::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "DataPageHeaderV2("; - out << "num_values=" << to_string(num_values); - out << ", " << "num_nulls=" << to_string(num_nulls); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "definition_levels_byte_length=" << to_string(definition_levels_byte_length); - out << ", " << "repetition_levels_byte_length=" << to_string(repetition_levels_byte_length); - out << ", " << "is_compressed="; (__isset.is_compressed ? (out << to_string(is_compressed)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ")"; -} - - -PageHeader::~PageHeader() throw() { -} - - -void PageHeader::__set_type(const PageType::type val) { - this->type = val; -} - -void PageHeader::__set_uncompressed_page_size(const int32_t val) { - this->uncompressed_page_size = val; -} - -void PageHeader::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageHeader::__set_crc(const int32_t val) { - this->crc = val; -__isset.crc = true; -} - -void PageHeader::__set_data_page_header(const DataPageHeader& val) { - this->data_page_header = val; -__isset.data_page_header = true; -} - -void PageHeader::__set_index_page_header(const IndexPageHeader& val) { - this->index_page_header = val; -__isset.index_page_header = true; -} - -void PageHeader::__set_dictionary_page_header(const DictionaryPageHeader& val) { - this->dictionary_page_header = val; -__isset.dictionary_page_header = true; -} - -void PageHeader::__set_data_page_header_v2(const DataPageHeaderV2& val) { - this->data_page_header_v2 = val; -__isset.data_page_header_v2 = true; -} -std::ostream& operator<<(std::ostream& out, const PageHeader& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageHeader::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast56; - xfer += iprot->readI32(ecast56); - this->type = (PageType::type)ecast56; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::duckdb_apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageHeader &a, PageHeader &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.uncompressed_page_size, b.uncompressed_page_size); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.crc, b.crc); - swap(a.data_page_header, b.data_page_header); - swap(a.index_page_header, b.index_page_header); - swap(a.dictionary_page_header, b.dictionary_page_header); - swap(a.data_page_header_v2, b.data_page_header_v2); - swap(a.__isset, b.__isset); -} - -PageHeader::PageHeader(const PageHeader& other57) { - type = other57.type; - uncompressed_page_size = other57.uncompressed_page_size; - compressed_page_size = other57.compressed_page_size; - crc = other57.crc; - data_page_header = other57.data_page_header; - index_page_header = other57.index_page_header; - dictionary_page_header = other57.dictionary_page_header; - data_page_header_v2 = other57.data_page_header_v2; - __isset = other57.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other58) { - type = other58.type; - uncompressed_page_size = other58.uncompressed_page_size; - compressed_page_size = other58.compressed_page_size; - crc = other58.crc; - data_page_header = other58.data_page_header; - index_page_header = other58.index_page_header; - dictionary_page_header = other58.dictionary_page_header; - data_page_header_v2 = other58.data_page_header_v2; - __isset = other58.__isset; - return *this; -} -void PageHeader::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageHeader("; - out << "type=" << to_string(type); - out << ", " << "uncompressed_page_size=" << to_string(uncompressed_page_size); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "crc="; (__isset.crc ? (out << to_string(crc)) : (out << "")); - out << ", " << "data_page_header="; (__isset.data_page_header ? (out << to_string(data_page_header)) : (out << "")); - out << ", " << "index_page_header="; (__isset.index_page_header ? (out << to_string(index_page_header)) : (out << "")); - out << ", " << "dictionary_page_header="; (__isset.dictionary_page_header ? (out << to_string(dictionary_page_header)) : (out << "")); - out << ", " << "data_page_header_v2="; (__isset.data_page_header_v2 ? (out << to_string(data_page_header_v2)) : (out << "")); - out << ")"; -} - - -KeyValue::~KeyValue() throw() { -} - - -void KeyValue::__set_key(const std::string& val) { - this->key = val; -} - -void KeyValue::__set_value(const std::string& val) { - this->value = val; -__isset.value = true; -} -std::ostream& operator<<(std::ostream& out, const KeyValue& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t KeyValue::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += oprot->writeFieldBegin("key", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(KeyValue &a, KeyValue &b) { - using ::std::swap; - swap(a.key, b.key); - swap(a.value, b.value); - swap(a.__isset, b.__isset); -} - -KeyValue::KeyValue(const KeyValue& other59) { - key = other59.key; - value = other59.value; - __isset = other59.__isset; -} -KeyValue& KeyValue::operator=(const KeyValue& other60) { - key = other60.key; - value = other60.value; - __isset = other60.__isset; - return *this; -} -void KeyValue::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "KeyValue("; - out << "key=" << to_string(key); - out << ", " << "value="; (__isset.value ? (out << to_string(value)) : (out << "")); - out << ")"; -} - - -SortingColumn::~SortingColumn() throw() { -} - - -void SortingColumn::__set_column_idx(const int32_t val) { - this->column_idx = val; -} - -void SortingColumn::__set_descending(const bool val) { - this->descending = val; -} - -void SortingColumn::__set_nulls_first(const bool val) { - this->nulls_first = val; -} -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t SortingColumn::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin("column_idx", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("descending", ::duckdb_apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("nulls_first", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(SortingColumn &a, SortingColumn &b) { - using ::std::swap; - swap(a.column_idx, b.column_idx); - swap(a.descending, b.descending); - swap(a.nulls_first, b.nulls_first); -} - -SortingColumn::SortingColumn(const SortingColumn& other61) { - column_idx = other61.column_idx; - descending = other61.descending; - nulls_first = other61.nulls_first; -} -SortingColumn& SortingColumn::operator=(const SortingColumn& other62) { - column_idx = other62.column_idx; - descending = other62.descending; - nulls_first = other62.nulls_first; - return *this; -} -void SortingColumn::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "SortingColumn("; - out << "column_idx=" << to_string(column_idx); - out << ", " << "descending=" << to_string(descending); - out << ", " << "nulls_first=" << to_string(nulls_first); - out << ")"; -} - - -PageEncodingStats::~PageEncodingStats() throw() { -} - - -void PageEncodingStats::__set_page_type(const PageType::type val) { - this->page_type = val; -} - -void PageEncodingStats::__set_encoding(const Encoding::type val) { - this->encoding = val; -} - -void PageEncodingStats::__set_count(const int32_t val) { - this->count = val; -} -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageEncodingStats::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast63; - xfer += iprot->readI32(ecast63); - this->page_type = (PageType::type)ecast63; - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast64; - xfer += iprot->readI32(ecast64); - this->encoding = (Encoding::type)ecast64; - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += oprot->writeFieldBegin("page_type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->page_type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32((int32_t)this->encoding); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::duckdb_apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageEncodingStats &a, PageEncodingStats &b) { - using ::std::swap; - swap(a.page_type, b.page_type); - swap(a.encoding, b.encoding); - swap(a.count, b.count); -} - -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other65) { - page_type = other65.page_type; - encoding = other65.encoding; - count = other65.count; -} -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other66) { - page_type = other66.page_type; - encoding = other66.encoding; - count = other66.count; - return *this; -} -void PageEncodingStats::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageEncodingStats("; - out << "page_type=" << to_string(page_type); - out << ", " << "encoding=" << to_string(encoding); - out << ", " << "count=" << to_string(count); - out << ")"; -} - - -ColumnMetaData::~ColumnMetaData() throw() { -} - - -void ColumnMetaData::__set_type(const Type::type val) { - this->type = val; -} - -void ColumnMetaData::__set_encodings(const duckdb::vector & val) { - this->encodings = val; -} - -void ColumnMetaData::__set_path_in_schema(const duckdb::vector & val) { - this->path_in_schema = val; -} - -void ColumnMetaData::__set_codec(const CompressionCodec::type val) { - this->codec = val; -} - -void ColumnMetaData::__set_num_values(const int64_t val) { - this->num_values = val; -} - -void ColumnMetaData::__set_total_uncompressed_size(const int64_t val) { - this->total_uncompressed_size = val; -} - -void ColumnMetaData::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -} - -void ColumnMetaData::__set_key_value_metadata(const duckdb::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; -} - -void ColumnMetaData::__set_data_page_offset(const int64_t val) { - this->data_page_offset = val; -} - -void ColumnMetaData::__set_index_page_offset(const int64_t val) { - this->index_page_offset = val; -__isset.index_page_offset = true; -} - -void ColumnMetaData::__set_dictionary_page_offset(const int64_t val) { - this->dictionary_page_offset = val; -__isset.dictionary_page_offset = true; -} - -void ColumnMetaData::__set_statistics(const Statistics& val) { - this->statistics = val; -__isset.statistics = true; -} - -void ColumnMetaData::__set_encoding_stats(const duckdb::vector & val) { - this->encoding_stats = val; -__isset.encoding_stats = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast67; - xfer += iprot->readI32(ecast67); - this->type = (Type::type)ecast67; - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size68; - ::duckdb_apache::thrift::protocol::TType _etype71; - xfer += iprot->readListBegin(_etype71, _size68); - this->encodings.resize(_size68); - uint32_t _i72; - for (_i72 = 0; _i72 < _size68; ++_i72) - { - int32_t ecast73; - xfer += iprot->readI32(ecast73); - this->encodings[_i72] = (Encoding::type)ecast73; - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size74; - ::duckdb_apache::thrift::protocol::TType _etype77; - xfer += iprot->readListBegin(_etype77, _size74); - this->path_in_schema.resize(_size74); - uint32_t _i78; - for (_i78 = 0; _i78 < _size74; ++_i78) - { - xfer += iprot->readString(this->path_in_schema[_i78]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast79; - xfer += iprot->readI32(ecast79); - this->codec = (CompressionCodec::type)ecast79; - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size80; - ::duckdb_apache::thrift::protocol::TType _etype83; - xfer += iprot->readListBegin(_etype83, _size80); - this->key_value_metadata.resize(_size80); - uint32_t _i84; - for (_i84 = 0; _i84 < _size80; ++_i84) - { - xfer += this->key_value_metadata[_i84].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size85; - ::duckdb_apache::thrift::protocol::TType _etype88; - xfer += iprot->readListBegin(_etype88, _size85); - this->encoding_stats.resize(_size85); - uint32_t _i89; - for (_i89 = 0; _i89 < _size85; ++_i89) - { - xfer += this->encoding_stats[_i89].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32((int32_t)this->type); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encodings", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - duckdb::vector ::const_iterator _iter90; - for (_iter90 = this->encodings.begin(); _iter90 != this->encodings.end(); ++_iter90) - { - xfer += oprot->writeI32((int32_t)(*_iter90)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - duckdb::vector ::const_iterator _iter91; - for (_iter91 = this->path_in_schema.begin(); _iter91 != this->path_in_schema.end(); ++_iter91) - { - xfer += oprot->writeString((*_iter91)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->codec); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_values", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - duckdb::vector ::const_iterator _iter92; - for (_iter92 = this->key_value_metadata.begin(); _iter92 != this->key_value_metadata.end(); ++_iter92) - { - xfer += (*_iter92).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::duckdb_apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::duckdb_apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::duckdb_apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - duckdb::vector ::const_iterator _iter93; - for (_iter93 = this->encoding_stats.begin(); _iter93 != this->encoding_stats.end(); ++_iter93) - { - xfer += (*_iter93).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnMetaData &a, ColumnMetaData &b) { - using ::std::swap; - swap(a.type, b.type); - swap(a.encodings, b.encodings); - swap(a.path_in_schema, b.path_in_schema); - swap(a.codec, b.codec); - swap(a.num_values, b.num_values); - swap(a.total_uncompressed_size, b.total_uncompressed_size); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.data_page_offset, b.data_page_offset); - swap(a.index_page_offset, b.index_page_offset); - swap(a.dictionary_page_offset, b.dictionary_page_offset); - swap(a.statistics, b.statistics); - swap(a.encoding_stats, b.encoding_stats); - swap(a.__isset, b.__isset); -} - -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other94) { - type = other94.type; - encodings = other94.encodings; - path_in_schema = other94.path_in_schema; - codec = other94.codec; - num_values = other94.num_values; - total_uncompressed_size = other94.total_uncompressed_size; - total_compressed_size = other94.total_compressed_size; - key_value_metadata = other94.key_value_metadata; - data_page_offset = other94.data_page_offset; - index_page_offset = other94.index_page_offset; - dictionary_page_offset = other94.dictionary_page_offset; - statistics = other94.statistics; - encoding_stats = other94.encoding_stats; - __isset = other94.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other95) { - type = other95.type; - encodings = other95.encodings; - path_in_schema = other95.path_in_schema; - codec = other95.codec; - num_values = other95.num_values; - total_uncompressed_size = other95.total_uncompressed_size; - total_compressed_size = other95.total_compressed_size; - key_value_metadata = other95.key_value_metadata; - data_page_offset = other95.data_page_offset; - index_page_offset = other95.index_page_offset; - dictionary_page_offset = other95.dictionary_page_offset; - statistics = other95.statistics; - encoding_stats = other95.encoding_stats; - __isset = other95.__isset; - return *this; -} -void ColumnMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnMetaData("; - out << "type=" << to_string(type); - out << ", " << "encodings=" << to_string(encodings); - out << ", " << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "codec=" << to_string(codec); - out << ", " << "num_values=" << to_string(num_values); - out << ", " << "total_uncompressed_size=" << to_string(total_uncompressed_size); - out << ", " << "total_compressed_size=" << to_string(total_compressed_size); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "data_page_offset=" << to_string(data_page_offset); - out << ", " << "index_page_offset="; (__isset.index_page_offset ? (out << to_string(index_page_offset)) : (out << "")); - out << ", " << "dictionary_page_offset="; (__isset.dictionary_page_offset ? (out << to_string(dictionary_page_offset)) : (out << "")); - out << ", " << "statistics="; (__isset.statistics ? (out << to_string(statistics)) : (out << "")); - out << ", " << "encoding_stats="; (__isset.encoding_stats ? (out << to_string(encoding_stats)) : (out << "")); - out << ")"; -} - - -EncryptionWithFooterKey::~EncryptionWithFooterKey() throw() { -} - -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionWithFooterKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { - using ::std::swap; - (void) a; - (void) b; -} - -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other96) { - (void) other96; -} -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other97) { - (void) other97; - return *this; -} -void EncryptionWithFooterKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithFooterKey("; - out << ")"; -} - - -EncryptionWithColumnKey::~EncryptionWithColumnKey() throw() { -} - - -void EncryptionWithColumnKey::__set_path_in_schema(const duckdb::vector & val) { - this->path_in_schema = val; -} - -void EncryptionWithColumnKey::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionWithColumnKey::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size98; - ::duckdb_apache::thrift::protocol::TType _etype101; - xfer += iprot->readListBegin(_etype101, _size98); - this->path_in_schema.resize(_size98); - uint32_t _i102; - for (_i102 = 0; _i102 < _size98; ++_i102) - { - xfer += iprot->readString(this->path_in_schema[_i102]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin("path_in_schema", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - duckdb::vector ::const_iterator _iter103; - for (_iter103 = this->path_in_schema.begin(); _iter103 != this->path_in_schema.end(); ++_iter103) - { - xfer += oprot->writeString((*_iter103)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { - using ::std::swap; - swap(a.path_in_schema, b.path_in_schema); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other104) { - path_in_schema = other104.path_in_schema; - key_metadata = other104.key_metadata; - __isset = other104.__isset; -} -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other105) { - path_in_schema = other105.path_in_schema; - key_metadata = other105.key_metadata; - __isset = other105.__isset; - return *this; -} -void EncryptionWithColumnKey::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionWithColumnKey("; - out << "path_in_schema=" << to_string(path_in_schema); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} - - -ColumnCryptoMetaData::~ColumnCryptoMetaData() throw() { -} - - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { - this->ENCRYPTION_WITH_FOOTER_KEY = val; -__isset.ENCRYPTION_WITH_FOOTER_KEY = true; -} - -void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val) { - this->ENCRYPTION_WITH_COLUMN_KEY = val; -__isset.ENCRYPTION_WITH_COLUMN_KEY = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { - using ::std::swap; - swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); - swap(a.ENCRYPTION_WITH_COLUMN_KEY, b.ENCRYPTION_WITH_COLUMN_KEY); - swap(a.__isset, b.__isset); -} - -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other106) { - ENCRYPTION_WITH_FOOTER_KEY = other106.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other106.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other106.__isset; -} -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other107) { - ENCRYPTION_WITH_FOOTER_KEY = other107.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other107.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other107.__isset; - return *this; -} -void ColumnCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnCryptoMetaData("; - out << "ENCRYPTION_WITH_FOOTER_KEY="; (__isset.ENCRYPTION_WITH_FOOTER_KEY ? (out << to_string(ENCRYPTION_WITH_FOOTER_KEY)) : (out << "")); - out << ", " << "ENCRYPTION_WITH_COLUMN_KEY="; (__isset.ENCRYPTION_WITH_COLUMN_KEY ? (out << to_string(ENCRYPTION_WITH_COLUMN_KEY)) : (out << "")); - out << ")"; -} - - -ColumnChunk::~ColumnChunk() throw() { -} - - -void ColumnChunk::__set_file_path(const std::string& val) { - this->file_path = val; -__isset.file_path = true; -} - -void ColumnChunk::__set_file_offset(const int64_t val) { - this->file_offset = val; -} - -void ColumnChunk::__set_meta_data(const ColumnMetaData& val) { - this->meta_data = val; -__isset.meta_data = true; -} - -void ColumnChunk::__set_offset_index_offset(const int64_t val) { - this->offset_index_offset = val; -__isset.offset_index_offset = true; -} - -void ColumnChunk::__set_offset_index_length(const int32_t val) { - this->offset_index_length = val; -__isset.offset_index_length = true; -} - -void ColumnChunk::__set_column_index_offset(const int64_t val) { - this->column_index_offset = val; -__isset.column_index_offset = true; -} - -void ColumnChunk::__set_column_index_length(const int32_t val) { - this->column_index_length = val; -__isset.column_index_length = true; -} - -void ColumnChunk::__set_crypto_metadata(const ColumnCryptoMetaData& val) { - this->crypto_metadata = val; -__isset.crypto_metadata = true; -} - -void ColumnChunk::__set_encrypted_column_metadata(const std::string& val) { - this->encrypted_column_metadata = val; -__isset.encrypted_column_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnChunk::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::duckdb_apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::duckdb_apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::duckdb_apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnChunk &a, ColumnChunk &b) { - using ::std::swap; - swap(a.file_path, b.file_path); - swap(a.file_offset, b.file_offset); - swap(a.meta_data, b.meta_data); - swap(a.offset_index_offset, b.offset_index_offset); - swap(a.offset_index_length, b.offset_index_length); - swap(a.column_index_offset, b.column_index_offset); - swap(a.column_index_length, b.column_index_length); - swap(a.crypto_metadata, b.crypto_metadata); - swap(a.encrypted_column_metadata, b.encrypted_column_metadata); - swap(a.__isset, b.__isset); -} - -ColumnChunk::ColumnChunk(const ColumnChunk& other108) { - file_path = other108.file_path; - file_offset = other108.file_offset; - meta_data = other108.meta_data; - offset_index_offset = other108.offset_index_offset; - offset_index_length = other108.offset_index_length; - column_index_offset = other108.column_index_offset; - column_index_length = other108.column_index_length; - crypto_metadata = other108.crypto_metadata; - encrypted_column_metadata = other108.encrypted_column_metadata; - __isset = other108.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other109) { - file_path = other109.file_path; - file_offset = other109.file_offset; - meta_data = other109.meta_data; - offset_index_offset = other109.offset_index_offset; - offset_index_length = other109.offset_index_length; - column_index_offset = other109.column_index_offset; - column_index_length = other109.column_index_length; - crypto_metadata = other109.crypto_metadata; - encrypted_column_metadata = other109.encrypted_column_metadata; - __isset = other109.__isset; - return *this; -} -void ColumnChunk::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnChunk("; - out << "file_path="; (__isset.file_path ? (out << to_string(file_path)) : (out << "")); - out << ", " << "file_offset=" << to_string(file_offset); - out << ", " << "meta_data="; (__isset.meta_data ? (out << to_string(meta_data)) : (out << "")); - out << ", " << "offset_index_offset="; (__isset.offset_index_offset ? (out << to_string(offset_index_offset)) : (out << "")); - out << ", " << "offset_index_length="; (__isset.offset_index_length ? (out << to_string(offset_index_length)) : (out << "")); - out << ", " << "column_index_offset="; (__isset.column_index_offset ? (out << to_string(column_index_offset)) : (out << "")); - out << ", " << "column_index_length="; (__isset.column_index_length ? (out << to_string(column_index_length)) : (out << "")); - out << ", " << "crypto_metadata="; (__isset.crypto_metadata ? (out << to_string(crypto_metadata)) : (out << "")); - out << ", " << "encrypted_column_metadata="; (__isset.encrypted_column_metadata ? (out << to_string(encrypted_column_metadata)) : (out << "")); - out << ")"; -} - - -RowGroup::~RowGroup() throw() { -} - - -void RowGroup::__set_columns(const duckdb::vector & val) { - this->columns = val; -} - -void RowGroup::__set_total_byte_size(const int64_t val) { - this->total_byte_size = val; -} - -void RowGroup::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void RowGroup::__set_sorting_columns(const duckdb::vector & val) { - this->sorting_columns = val; -__isset.sorting_columns = true; -} - -void RowGroup::__set_file_offset(const int64_t val) { - this->file_offset = val; -__isset.file_offset = true; -} - -void RowGroup::__set_total_compressed_size(const int64_t val) { - this->total_compressed_size = val; -__isset.total_compressed_size = true; -} - -void RowGroup::__set_ordinal(const int16_t val) { - this->ordinal = val; -__isset.ordinal = true; -} -std::ostream& operator<<(std::ostream& out, const RowGroup& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t RowGroup::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size110; - ::duckdb_apache::thrift::protocol::TType _etype113; - xfer += iprot->readListBegin(_etype113, _size110); - this->columns.resize(_size110); - uint32_t _i114; - for (_i114 = 0; _i114 < _size110; ++_i114) - { - xfer += this->columns[_i114].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size115; - ::duckdb_apache::thrift::protocol::TType _etype118; - xfer += iprot->readListBegin(_etype118, _size115); - this->sorting_columns.resize(_size115); - uint32_t _i119; - for (_i119 = 0; _i119 < _size115; ++_i119) - { - xfer += this->sorting_columns[_i119].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += oprot->writeFieldBegin("columns", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - duckdb::vector ::const_iterator _iter120; - for (_iter120 = this->columns.begin(); _iter120 != this->columns.end(); ++_iter120) - { - xfer += (*_iter120).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_byte_size", ::duckdb_apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - duckdb::vector ::const_iterator _iter121; - for (_iter121 = this->sorting_columns.begin(); _iter121 != this->sorting_columns.end(); ++_iter121) - { - xfer += (*_iter121).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::duckdb_apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::duckdb_apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::duckdb_apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(RowGroup &a, RowGroup &b) { - using ::std::swap; - swap(a.columns, b.columns); - swap(a.total_byte_size, b.total_byte_size); - swap(a.num_rows, b.num_rows); - swap(a.sorting_columns, b.sorting_columns); - swap(a.file_offset, b.file_offset); - swap(a.total_compressed_size, b.total_compressed_size); - swap(a.ordinal, b.ordinal); - swap(a.__isset, b.__isset); -} - -RowGroup::RowGroup(const RowGroup& other122) { - columns = other122.columns; - total_byte_size = other122.total_byte_size; - num_rows = other122.num_rows; - sorting_columns = other122.sorting_columns; - file_offset = other122.file_offset; - total_compressed_size = other122.total_compressed_size; - ordinal = other122.ordinal; - __isset = other122.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other123) { - columns = other123.columns; - total_byte_size = other123.total_byte_size; - num_rows = other123.num_rows; - sorting_columns = other123.sorting_columns; - file_offset = other123.file_offset; - total_compressed_size = other123.total_compressed_size; - ordinal = other123.ordinal; - __isset = other123.__isset; - return *this; -} -void RowGroup::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "RowGroup("; - out << "columns=" << to_string(columns); - out << ", " << "total_byte_size=" << to_string(total_byte_size); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "sorting_columns="; (__isset.sorting_columns ? (out << to_string(sorting_columns)) : (out << "")); - out << ", " << "file_offset="; (__isset.file_offset ? (out << to_string(file_offset)) : (out << "")); - out << ", " << "total_compressed_size="; (__isset.total_compressed_size ? (out << to_string(total_compressed_size)) : (out << "")); - out << ", " << "ordinal="; (__isset.ordinal ? (out << to_string(ordinal)) : (out << "")); - out << ")"; -} - - -TypeDefinedOrder::~TypeDefinedOrder() throw() { -} - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t TypeDefinedOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { - using ::std::swap; - (void) a; - (void) b; -} - -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other124) { - (void) other124; -} -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other125) { - (void) other125; - return *this; -} -void TypeDefinedOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "TypeDefinedOrder("; - out << ")"; -} - - -ColumnOrder::~ColumnOrder() throw() { -} - - -void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { - this->TYPE_ORDER = val; -__isset.TYPE_ORDER = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnOrder::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnOrder &a, ColumnOrder &b) { - using ::std::swap; - swap(a.TYPE_ORDER, b.TYPE_ORDER); - swap(a.__isset, b.__isset); -} - -ColumnOrder::ColumnOrder(const ColumnOrder& other126) { - TYPE_ORDER = other126.TYPE_ORDER; - __isset = other126.__isset; -} -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other127) { - TYPE_ORDER = other127.TYPE_ORDER; - __isset = other127.__isset; - return *this; -} -void ColumnOrder::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnOrder("; - out << "TYPE_ORDER="; (__isset.TYPE_ORDER ? (out << to_string(TYPE_ORDER)) : (out << "")); - out << ")"; -} - - -PageLocation::~PageLocation() throw() { -} - - -void PageLocation::__set_offset(const int64_t val) { - this->offset = val; -} - -void PageLocation::__set_compressed_page_size(const int32_t val) { - this->compressed_page_size = val; -} - -void PageLocation::__set_first_row_index(const int64_t val) { - this->first_row_index = val; -} -std::ostream& operator<<(std::ostream& out, const PageLocation& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t PageLocation::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += oprot->writeFieldBegin("offset", ::duckdb_apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::duckdb_apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("first_row_index", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(PageLocation &a, PageLocation &b) { - using ::std::swap; - swap(a.offset, b.offset); - swap(a.compressed_page_size, b.compressed_page_size); - swap(a.first_row_index, b.first_row_index); -} - -PageLocation::PageLocation(const PageLocation& other128) { - offset = other128.offset; - compressed_page_size = other128.compressed_page_size; - first_row_index = other128.first_row_index; -} -PageLocation& PageLocation::operator=(const PageLocation& other129) { - offset = other129.offset; - compressed_page_size = other129.compressed_page_size; - first_row_index = other129.first_row_index; - return *this; -} -void PageLocation::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "PageLocation("; - out << "offset=" << to_string(offset); - out << ", " << "compressed_page_size=" << to_string(compressed_page_size); - out << ", " << "first_row_index=" << to_string(first_row_index); - out << ")"; -} - - -OffsetIndex::~OffsetIndex() throw() { -} - - -void OffsetIndex::__set_page_locations(const duckdb::vector & val) { - this->page_locations = val; -} -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t OffsetIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size130; - ::duckdb_apache::thrift::protocol::TType _etype133; - xfer += iprot->readListBegin(_etype133, _size130); - this->page_locations.resize(_size130); - uint32_t _i134; - for (_i134 = 0; _i134 < _size130; ++_i134) - { - xfer += this->page_locations[_i134].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin("page_locations", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - duckdb::vector ::const_iterator _iter135; - for (_iter135 = this->page_locations.begin(); _iter135 != this->page_locations.end(); ++_iter135) - { - xfer += (*_iter135).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(OffsetIndex &a, OffsetIndex &b) { - using ::std::swap; - swap(a.page_locations, b.page_locations); -} - -OffsetIndex::OffsetIndex(const OffsetIndex& other136) { - page_locations = other136.page_locations; -} -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other137) { - page_locations = other137.page_locations; - return *this; -} -void OffsetIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "OffsetIndex("; - out << "page_locations=" << to_string(page_locations); - out << ")"; -} - - -ColumnIndex::~ColumnIndex() throw() { -} - - -void ColumnIndex::__set_null_pages(const duckdb::vector & val) { - this->null_pages = val; -} - -void ColumnIndex::__set_min_values(const duckdb::vector & val) { - this->min_values = val; -} - -void ColumnIndex::__set_max_values(const duckdb::vector & val) { - this->max_values = val; -} - -void ColumnIndex::__set_boundary_order(const BoundaryOrder::type val) { - this->boundary_order = val; -} - -void ColumnIndex::__set_null_counts(const duckdb::vector & val) { - this->null_counts = val; -__isset.null_counts = true; -} -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t ColumnIndex::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size138; - ::duckdb_apache::thrift::protocol::TType _etype141; - xfer += iprot->readListBegin(_etype141, _size138); - this->null_pages.resize(_size138); - uint32_t _i142; - for (_i142 = 0; _i142 < _size138; ++_i142) - { - xfer += iprot->readBool(this->null_pages[_i142]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size143; - ::duckdb_apache::thrift::protocol::TType _etype146; - xfer += iprot->readListBegin(_etype146, _size143); - this->min_values.resize(_size143); - uint32_t _i147; - for (_i147 = 0; _i147 < _size143; ++_i147) - { - xfer += iprot->readBinary(this->min_values[_i147]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size148; - ::duckdb_apache::thrift::protocol::TType _etype151; - xfer += iprot->readListBegin(_etype151, _size148); - this->max_values.resize(_size148); - uint32_t _i152; - for (_i152 = 0; _i152 < _size148; ++_i152) - { - xfer += iprot->readBinary(this->max_values[_i152]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - int32_t ecast153; - xfer += iprot->readI32(ecast153); - this->boundary_order = (BoundaryOrder::type)ecast153; - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size154; - ::duckdb_apache::thrift::protocol::TType _etype157; - xfer += iprot->readListBegin(_etype157, _size154); - this->null_counts.resize(_size154); - uint32_t _i158; - for (_i158 = 0; _i158 < _size154; ++_i158) - { - xfer += iprot->readI64(this->null_counts[_i158]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin("null_pages", ::duckdb_apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - duckdb::vector ::const_iterator _iter159; - for (_iter159 = this->null_pages.begin(); _iter159 != this->null_pages.end(); ++_iter159) - { - xfer += oprot->writeBool((*_iter159)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("min_values", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - duckdb::vector ::const_iterator _iter160; - for (_iter160 = this->min_values.begin(); _iter160 != this->min_values.end(); ++_iter160) - { - xfer += oprot->writeBinary((*_iter160)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("max_values", ::duckdb_apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - duckdb::vector ::const_iterator _iter161; - for (_iter161 = this->max_values.begin(); _iter161 != this->max_values.end(); ++_iter161) - { - xfer += oprot->writeBinary((*_iter161)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("boundary_order", ::duckdb_apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32((int32_t)this->boundary_order); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - duckdb::vector ::const_iterator _iter162; - for (_iter162 = this->null_counts.begin(); _iter162 != this->null_counts.end(); ++_iter162) - { - xfer += oprot->writeI64((*_iter162)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(ColumnIndex &a, ColumnIndex &b) { - using ::std::swap; - swap(a.null_pages, b.null_pages); - swap(a.min_values, b.min_values); - swap(a.max_values, b.max_values); - swap(a.boundary_order, b.boundary_order); - swap(a.null_counts, b.null_counts); - swap(a.__isset, b.__isset); -} - -ColumnIndex::ColumnIndex(const ColumnIndex& other163) { - null_pages = other163.null_pages; - min_values = other163.min_values; - max_values = other163.max_values; - boundary_order = other163.boundary_order; - null_counts = other163.null_counts; - __isset = other163.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other164) { - null_pages = other164.null_pages; - min_values = other164.min_values; - max_values = other164.max_values; - boundary_order = other164.boundary_order; - null_counts = other164.null_counts; - __isset = other164.__isset; - return *this; -} -void ColumnIndex::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "ColumnIndex("; - out << "null_pages=" << to_string(null_pages); - out << ", " << "min_values=" << to_string(min_values); - out << ", " << "max_values=" << to_string(max_values); - out << ", " << "boundary_order=" << to_string(boundary_order); - out << ", " << "null_counts="; (__isset.null_counts ? (out << to_string(null_counts)) : (out << "")); - out << ")"; -} - - -AesGcmV1::~AesGcmV1() throw() { -} - - -void AesGcmV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} - -void AesGcmV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} - -void AesGcmV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t AesGcmV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmV1 &a, AesGcmV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmV1::AesGcmV1(const AesGcmV1& other165) { - aad_prefix = other165.aad_prefix; - aad_file_unique = other165.aad_file_unique; - supply_aad_prefix = other165.supply_aad_prefix; - __isset = other165.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other166) { - aad_prefix = other166.aad_prefix; - aad_file_unique = other166.aad_file_unique; - supply_aad_prefix = other166.supply_aad_prefix; - __isset = other166.__isset; - return *this; -} -void AesGcmV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; -} - - -AesGcmCtrV1::~AesGcmCtrV1() throw() { -} - - -void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { - this->aad_prefix = val; -__isset.aad_prefix = true; -} - -void AesGcmCtrV1::__set_aad_file_unique(const std::string& val) { - this->aad_file_unique = val; -__isset.aad_file_unique = true; -} - -void AesGcmCtrV1::__set_supply_aad_prefix(const bool val) { - this->supply_aad_prefix = val; -__isset.supply_aad_prefix = true; -} -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t AesGcmCtrV1::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::duckdb_apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::duckdb_apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { - using ::std::swap; - swap(a.aad_prefix, b.aad_prefix); - swap(a.aad_file_unique, b.aad_file_unique); - swap(a.supply_aad_prefix, b.supply_aad_prefix); - swap(a.__isset, b.__isset); -} - -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other167) { - aad_prefix = other167.aad_prefix; - aad_file_unique = other167.aad_file_unique; - supply_aad_prefix = other167.supply_aad_prefix; - __isset = other167.__isset; -} -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other168) { - aad_prefix = other168.aad_prefix; - aad_file_unique = other168.aad_file_unique; - supply_aad_prefix = other168.supply_aad_prefix; - __isset = other168.__isset; - return *this; -} -void AesGcmCtrV1::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "AesGcmCtrV1("; - out << "aad_prefix="; (__isset.aad_prefix ? (out << to_string(aad_prefix)) : (out << "")); - out << ", " << "aad_file_unique="; (__isset.aad_file_unique ? (out << to_string(aad_file_unique)) : (out << "")); - out << ", " << "supply_aad_prefix="; (__isset.supply_aad_prefix ? (out << to_string(supply_aad_prefix)) : (out << "")); - out << ")"; -} - - -EncryptionAlgorithm::~EncryptionAlgorithm() throw() { -} - - -void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { - this->AES_GCM_V1 = val; -__isset.AES_GCM_V1 = true; -} - -void EncryptionAlgorithm::__set_AES_GCM_CTR_V1(const AesGcmCtrV1& val) { - this->AES_GCM_CTR_V1 = val; -__isset.AES_GCM_CTR_V1 = true; -} -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t EncryptionAlgorithm::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::duckdb_apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { - using ::std::swap; - swap(a.AES_GCM_V1, b.AES_GCM_V1); - swap(a.AES_GCM_CTR_V1, b.AES_GCM_CTR_V1); - swap(a.__isset, b.__isset); -} - -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other169) { - AES_GCM_V1 = other169.AES_GCM_V1; - AES_GCM_CTR_V1 = other169.AES_GCM_CTR_V1; - __isset = other169.__isset; -} -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other170) { - AES_GCM_V1 = other170.AES_GCM_V1; - AES_GCM_CTR_V1 = other170.AES_GCM_CTR_V1; - __isset = other170.__isset; - return *this; -} -void EncryptionAlgorithm::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "EncryptionAlgorithm("; - out << "AES_GCM_V1="; (__isset.AES_GCM_V1 ? (out << to_string(AES_GCM_V1)) : (out << "")); - out << ", " << "AES_GCM_CTR_V1="; (__isset.AES_GCM_CTR_V1 ? (out << to_string(AES_GCM_CTR_V1)) : (out << "")); - out << ")"; -} - - -FileMetaData::~FileMetaData() throw() { -} - - -void FileMetaData::__set_version(const int32_t val) { - this->version = val; -} - -void FileMetaData::__set_schema(const duckdb::vector & val) { - this->schema = val; -} - -void FileMetaData::__set_num_rows(const int64_t val) { - this->num_rows = val; -} - -void FileMetaData::__set_row_groups(const duckdb::vector & val) { - this->row_groups = val; -} - -void FileMetaData::__set_key_value_metadata(const duckdb::vector & val) { - this->key_value_metadata = val; -__isset.key_value_metadata = true; -} - -void FileMetaData::__set_created_by(const std::string& val) { - this->created_by = val; -__isset.created_by = true; -} - -void FileMetaData::__set_column_orders(const duckdb::vector & val) { - this->column_orders = val; -__isset.column_orders = true; -} - -void FileMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -__isset.encryption_algorithm = true; -} - -void FileMetaData::__set_footer_signing_key_metadata(const std::string& val) { - this->footer_signing_key_metadata = val; -__isset.footer_signing_key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t FileMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size171; - ::duckdb_apache::thrift::protocol::TType _etype174; - xfer += iprot->readListBegin(_etype174, _size171); - this->schema.resize(_size171); - uint32_t _i175; - for (_i175 = 0; _i175 < _size171; ++_i175) - { - xfer += this->schema[_i175].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::duckdb_apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size176; - ::duckdb_apache::thrift::protocol::TType _etype179; - xfer += iprot->readListBegin(_etype179, _size176); - this->row_groups.resize(_size176); - uint32_t _i180; - for (_i180 = 0; _i180 < _size176; ++_i180) - { - xfer += this->row_groups[_i180].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size181; - ::duckdb_apache::thrift::protocol::TType _etype184; - xfer += iprot->readListBegin(_etype184, _size181); - this->key_value_metadata.resize(_size181); - uint32_t _i185; - for (_i185 = 0; _i185 < _size181; ++_i185) - { - xfer += this->key_value_metadata[_i185].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::duckdb_apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size186; - ::duckdb_apache::thrift::protocol::TType _etype189; - xfer += iprot->readListBegin(_etype189, _size186); - this->column_orders.resize(_size186); - uint32_t _i190; - for (_i190 = 0; _i190 < _size186; ++_i190) - { - xfer += this->column_orders[_i190].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += oprot->writeFieldBegin("version", ::duckdb_apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("schema", ::duckdb_apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - duckdb::vector ::const_iterator _iter191; - for (_iter191 = this->schema.begin(); _iter191 != this->schema.end(); ++_iter191) - { - xfer += (*_iter191).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::duckdb_apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("row_groups", ::duckdb_apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - duckdb::vector ::const_iterator _iter192; - for (_iter192 = this->row_groups.begin(); _iter192 != this->row_groups.end(); ++_iter192) - { - xfer += (*_iter192).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::duckdb_apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - duckdb::vector ::const_iterator _iter193; - for (_iter193 = this->key_value_metadata.begin(); _iter193 != this->key_value_metadata.end(); ++_iter193) - { - xfer += (*_iter193).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::duckdb_apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::duckdb_apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::duckdb_apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - duckdb::vector ::const_iterator _iter194; - for (_iter194 = this->column_orders.begin(); _iter194 != this->column_orders.end(); ++_iter194) - { - xfer += (*_iter194).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileMetaData &a, FileMetaData &b) { - using ::std::swap; - swap(a.version, b.version); - swap(a.schema, b.schema); - swap(a.num_rows, b.num_rows); - swap(a.row_groups, b.row_groups); - swap(a.key_value_metadata, b.key_value_metadata); - swap(a.created_by, b.created_by); - swap(a.column_orders, b.column_orders); - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.footer_signing_key_metadata, b.footer_signing_key_metadata); - swap(a.__isset, b.__isset); -} - -FileMetaData::FileMetaData(const FileMetaData& other195) { - version = other195.version; - schema = other195.schema; - num_rows = other195.num_rows; - row_groups = other195.row_groups; - key_value_metadata = other195.key_value_metadata; - created_by = other195.created_by; - column_orders = other195.column_orders; - encryption_algorithm = other195.encryption_algorithm; - footer_signing_key_metadata = other195.footer_signing_key_metadata; - __isset = other195.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other196) { - version = other196.version; - schema = other196.schema; - num_rows = other196.num_rows; - row_groups = other196.row_groups; - key_value_metadata = other196.key_value_metadata; - created_by = other196.created_by; - column_orders = other196.column_orders; - encryption_algorithm = other196.encryption_algorithm; - footer_signing_key_metadata = other196.footer_signing_key_metadata; - __isset = other196.__isset; - return *this; -} - -void FileMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileMetaData("; - out << "version=" << to_string(version); - out << ", " << "schema=" << to_string(schema); - out << ", " << "num_rows=" << to_string(num_rows); - out << ", " << "row_groups=" << to_string(row_groups); - out << ", " << "key_value_metadata="; (__isset.key_value_metadata ? (out << to_string(key_value_metadata)) : (out << "")); - out << ", " << "created_by="; (__isset.created_by ? (out << to_string(created_by)) : (out << "")); - out << ", " << "column_orders="; (__isset.column_orders ? (out << to_string(column_orders)) : (out << "")); - out << ", " << "encryption_algorithm="; (__isset.encryption_algorithm ? (out << to_string(encryption_algorithm)) : (out << "")); - out << ", " << "footer_signing_key_metadata="; (__isset.footer_signing_key_metadata ? (out << to_string(footer_signing_key_metadata)) : (out << "")); - out << ")"; -} - - -FileCryptoMetaData::~FileCryptoMetaData() throw() { -} - - -void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { - this->encryption_algorithm = val; -} - -void FileCryptoMetaData::__set_key_metadata(const std::string& val) { - this->key_metadata = val; -__isset.key_metadata = true; -} -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t FileCryptoMetaData::read(::duckdb_apache::thrift::protocol::TProtocol* iprot) { - - ::duckdb_apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::duckdb_apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::duckdb_apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::duckdb_apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::duckdb_apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::duckdb_apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin("encryption_algorithm", ::duckdb_apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::duckdb_apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { - using ::std::swap; - swap(a.encryption_algorithm, b.encryption_algorithm); - swap(a.key_metadata, b.key_metadata); - swap(a.__isset, b.__isset); -} - -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other197) { - encryption_algorithm = other197.encryption_algorithm; - key_metadata = other197.key_metadata; - __isset = other197.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other198) { - encryption_algorithm = other198.encryption_algorithm; - key_metadata = other198.key_metadata; - __isset = other198.__isset; - return *this; -} -void FileCryptoMetaData::printTo(std::ostream& out) const { - using ::duckdb_apache::thrift::to_string; - out << "FileCryptoMetaData("; - out << "encryption_algorithm=" << to_string(encryption_algorithm); - out << ", " << "key_metadata="; (__isset.key_metadata ? (out << to_string(key_metadata)) : (out << "")); - out << ")"; -} - -}} // namespace diff --git a/src/duckdb/third_party/parquet/parquet_types.h b/src/duckdb/third_party/parquet/parquet_types.h deleted file mode 100644 index 6a9e9e171..000000000 --- a/src/duckdb/third_party/parquet/parquet_types.h +++ /dev/null @@ -1,2604 +0,0 @@ -/** - * Autogenerated by Thrift Compiler (0.11.0) - * - * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING - * @generated - */ -#ifndef parquet_TYPES_H -#define parquet_TYPES_H - -#include - -#include "duckdb/common/vector.hpp" - -#include "thrift/Thrift.h" -#include "thrift/TApplicationException.h" -#include "thrift/TBase.h" -#include "thrift/protocol/TProtocol.h" -#include "thrift/transport/TTransport.h" - -#include "thrift/stdcxx.h" - -#include "windows_compatibility.h" - -namespace duckdb_parquet { namespace format { - -struct Type { - enum type { - BOOLEAN = 0, - INT32 = 1, - INT64 = 2, - INT96 = 3, - FLOAT = 4, - DOUBLE = 5, - BYTE_ARRAY = 6, - FIXED_LEN_BYTE_ARRAY = 7 - }; -}; - -extern const std::map _Type_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Type::type& val); - -struct ConvertedType { - enum type { - UTF8 = 0, - MAP = 1, - MAP_KEY_VALUE = 2, - LIST = 3, - ENUM = 4, - DECIMAL = 5, - DATE = 6, - TIME_MILLIS = 7, - TIME_MICROS = 8, - TIMESTAMP_MILLIS = 9, - TIMESTAMP_MICROS = 10, - UINT_8 = 11, - UINT_16 = 12, - UINT_32 = 13, - UINT_64 = 14, - INT_8 = 15, - INT_16 = 16, - INT_32 = 17, - INT_64 = 18, - JSON = 19, - BSON = 20, - INTERVAL = 21 - }; -}; - -extern const std::map _ConvertedType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const ConvertedType::type& val); - -struct FieldRepetitionType { - enum type { - REQUIRED = 0, - OPTIONAL = 1, - REPEATED = 2 - }; -}; - -extern const std::map _FieldRepetitionType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val); - -struct Encoding { - enum type { - PLAIN = 0, - PLAIN_DICTIONARY = 2, - RLE = 3, - BIT_PACKED = 4, - DELTA_BINARY_PACKED = 5, - DELTA_LENGTH_BYTE_ARRAY = 6, - DELTA_BYTE_ARRAY = 7, - RLE_DICTIONARY = 8 - }; -}; - -extern const std::map _Encoding_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const Encoding::type& val); - -struct CompressionCodec { - enum type { - UNCOMPRESSED = 0, - SNAPPY = 1, - GZIP = 2, - LZO = 3, - BROTLI = 4, - LZ4 = 5, - ZSTD = 6 - }; -}; - -extern const std::map _CompressionCodec_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const CompressionCodec::type& val); - -struct PageType { - enum type { - DATA_PAGE = 0, - INDEX_PAGE = 1, - DICTIONARY_PAGE = 2, - DATA_PAGE_V2 = 3 - }; -}; - -extern const std::map _PageType_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const PageType::type& val); - -struct BoundaryOrder { - enum type { - UNORDERED = 0, - ASCENDING = 1, - DESCENDING = 2 - }; -}; - -extern const std::map _BoundaryOrder_VALUES_TO_NAMES; - -std::ostream& operator<<(std::ostream& out, const BoundaryOrder::type& val); - -class Statistics; - -class StringType; - -class UUIDType; - -class MapType; - -class ListType; - -class EnumType; - -class DateType; - -class NullType; - -class DecimalType; - -class MilliSeconds; - -class MicroSeconds; - -class NanoSeconds; - -class TimeUnit; - -class TimestampType; - -class TimeType; - -class IntType; - -class JsonType; - -class BsonType; - -class LogicalType; - -class SchemaElement; - -class DataPageHeader; - -class IndexPageHeader; - -class DictionaryPageHeader; - -class DataPageHeaderV2; - -class PageHeader; - -class KeyValue; - -class SortingColumn; - -class PageEncodingStats; - -class ColumnMetaData; - -class EncryptionWithFooterKey; - -class EncryptionWithColumnKey; - -class ColumnCryptoMetaData; - -class ColumnChunk; - -class RowGroup; - -class TypeDefinedOrder; - -class ColumnOrder; - -class PageLocation; - -class OffsetIndex; - -class ColumnIndex; - -class AesGcmV1; - -class AesGcmCtrV1; - -class EncryptionAlgorithm; - -class FileMetaData; - -class FileCryptoMetaData; - -typedef struct _Statistics__isset { - _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false) {} - bool max :1; - bool min :1; - bool null_count :1; - bool distinct_count :1; - bool max_value :1; - bool min_value :1; -} _Statistics__isset; - -class Statistics : public virtual ::duckdb_apache::thrift::TBase { - public: - - Statistics(const Statistics&); - Statistics& operator=(const Statistics&); - Statistics() : max(), min(), null_count(0), distinct_count(0), max_value(), min_value() { - } - - virtual ~Statistics() throw(); - std::string max; - std::string min; - int64_t null_count; - int64_t distinct_count; - std::string max_value; - std::string min_value; - - _Statistics__isset __isset; - - void __set_max(const std::string& val); - - void __set_min(const std::string& val); - - void __set_null_count(const int64_t val); - - void __set_distinct_count(const int64_t val); - - void __set_max_value(const std::string& val); - - void __set_min_value(const std::string& val); - - bool operator == (const Statistics & rhs) const - { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - if (__isset.max_value != rhs.__isset.max_value) - return false; - else if (__isset.max_value && !(max_value == rhs.max_value)) - return false; - if (__isset.min_value != rhs.__isset.min_value) - return false; - else if (__isset.min_value && !(min_value == rhs.min_value)) - return false; - return true; - } - bool operator != (const Statistics &rhs) const { - return !(*this == rhs); - } - - bool operator < (const Statistics & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(Statistics &a, Statistics &b); - -std::ostream& operator<<(std::ostream& out, const Statistics& obj); - - -class StringType : public virtual ::duckdb_apache::thrift::TBase { - public: - - StringType(const StringType&); - StringType& operator=(const StringType&); - StringType() { - } - - virtual ~StringType() throw(); - - bool operator == (const StringType & /* rhs */) const - { - return true; - } - bool operator != (const StringType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const StringType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(StringType &a, StringType &b); - -std::ostream& operator<<(std::ostream& out, const StringType& obj); - - -class UUIDType : public virtual ::duckdb_apache::thrift::TBase { - public: - - UUIDType(const UUIDType&); - UUIDType& operator=(const UUIDType&); - UUIDType() { - } - - virtual ~UUIDType() throw(); - - bool operator == (const UUIDType & /* rhs */) const - { - return true; - } - bool operator != (const UUIDType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const UUIDType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(UUIDType &a, UUIDType &b); - -std::ostream& operator<<(std::ostream& out, const UUIDType& obj); - - -class MapType : public virtual ::duckdb_apache::thrift::TBase { - public: - - MapType(const MapType&); - MapType& operator=(const MapType&); - MapType() { - } - - virtual ~MapType() throw(); - - bool operator == (const MapType & /* rhs */) const - { - return true; - } - bool operator != (const MapType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MapType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MapType &a, MapType &b); - -std::ostream& operator<<(std::ostream& out, const MapType& obj); - - -class ListType : public virtual ::duckdb_apache::thrift::TBase { - public: - - ListType(const ListType&); - ListType& operator=(const ListType&); - ListType() { - } - - virtual ~ListType() throw(); - - bool operator == (const ListType & /* rhs */) const - { - return true; - } - bool operator != (const ListType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ListType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ListType &a, ListType &b); - -std::ostream& operator<<(std::ostream& out, const ListType& obj); - - -class EnumType : public virtual ::duckdb_apache::thrift::TBase { - public: - - EnumType(const EnumType&); - EnumType& operator=(const EnumType&); - EnumType() { - } - - virtual ~EnumType() throw(); - - bool operator == (const EnumType & /* rhs */) const - { - return true; - } - bool operator != (const EnumType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EnumType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EnumType &a, EnumType &b); - -std::ostream& operator<<(std::ostream& out, const EnumType& obj); - - -class DateType : public virtual ::duckdb_apache::thrift::TBase { - public: - - DateType(const DateType&); - DateType& operator=(const DateType&); - DateType() { - } - - virtual ~DateType() throw(); - - bool operator == (const DateType & /* rhs */) const - { - return true; - } - bool operator != (const DateType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DateType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DateType &a, DateType &b); - -std::ostream& operator<<(std::ostream& out, const DateType& obj); - - -class NullType : public virtual ::duckdb_apache::thrift::TBase { - public: - - NullType(const NullType&); - NullType& operator=(const NullType&); - NullType() { - } - - virtual ~NullType() throw(); - - bool operator == (const NullType & /* rhs */) const - { - return true; - } - bool operator != (const NullType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const NullType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NullType &a, NullType &b); - -std::ostream& operator<<(std::ostream& out, const NullType& obj); - - -class DecimalType : public virtual ::duckdb_apache::thrift::TBase { - public: - - DecimalType(const DecimalType&); - DecimalType& operator=(const DecimalType&); - DecimalType() : scale(0), precision(0) { - } - - virtual ~DecimalType() throw(); - int32_t scale; - int32_t precision; - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - bool operator == (const DecimalType & rhs) const - { - if (!(scale == rhs.scale)) - return false; - if (!(precision == rhs.precision)) - return false; - return true; - } - bool operator != (const DecimalType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DecimalType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DecimalType &a, DecimalType &b); - -std::ostream& operator<<(std::ostream& out, const DecimalType& obj); - - -class MilliSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - MilliSeconds(const MilliSeconds&); - MilliSeconds& operator=(const MilliSeconds&); - MilliSeconds() { - } - - virtual ~MilliSeconds() throw(); - - bool operator == (const MilliSeconds & /* rhs */) const - { - return true; - } - bool operator != (const MilliSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MilliSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MilliSeconds &a, MilliSeconds &b); - -std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); - - -class MicroSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - MicroSeconds(const MicroSeconds&); - MicroSeconds& operator=(const MicroSeconds&); - MicroSeconds() { - } - - virtual ~MicroSeconds() throw(); - - bool operator == (const MicroSeconds & /* rhs */) const - { - return true; - } - bool operator != (const MicroSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const MicroSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(MicroSeconds &a, MicroSeconds &b); - -std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); - - -class NanoSeconds : public virtual ::duckdb_apache::thrift::TBase { - public: - - NanoSeconds(const NanoSeconds&); - NanoSeconds& operator=(const NanoSeconds&); - NanoSeconds() { - } - - virtual ~NanoSeconds() throw(); - - bool operator == (const NanoSeconds & /* rhs */) const - { - return true; - } - bool operator != (const NanoSeconds &rhs) const { - return !(*this == rhs); - } - - bool operator < (const NanoSeconds & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(NanoSeconds &a, NanoSeconds &b); - -std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj); - -typedef struct _TimeUnit__isset { - _TimeUnit__isset() : MILLIS(false), MICROS(false), NANOS(false) {} - bool MILLIS :1; - bool MICROS :1; - bool NANOS :1; -} _TimeUnit__isset; - -class TimeUnit : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimeUnit(const TimeUnit&); - TimeUnit& operator=(const TimeUnit&); - TimeUnit() { - } - - virtual ~TimeUnit() throw(); - MilliSeconds MILLIS; - MicroSeconds MICROS; - NanoSeconds NANOS; - - _TimeUnit__isset __isset; - - void __set_MILLIS(const MilliSeconds& val); - - void __set_MICROS(const MicroSeconds& val); - - void __set_NANOS(const NanoSeconds& val); - - bool operator == (const TimeUnit & rhs) const - { - if (__isset.MILLIS != rhs.__isset.MILLIS) - return false; - else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) - return false; - if (__isset.MICROS != rhs.__isset.MICROS) - return false; - else if (__isset.MICROS && !(MICROS == rhs.MICROS)) - return false; - if (__isset.NANOS != rhs.__isset.NANOS) - return false; - else if (__isset.NANOS && !(NANOS == rhs.NANOS)) - return false; - return true; - } - bool operator != (const TimeUnit &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimeUnit & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeUnit &a, TimeUnit &b); - -std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); - - -class TimestampType : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimestampType(const TimestampType&); - TimestampType& operator=(const TimestampType&); - TimestampType() : isAdjustedToUTC(0) { - } - - virtual ~TimestampType() throw(); - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator == (const TimestampType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator != (const TimestampType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimestampType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimestampType &a, TimestampType &b); - -std::ostream& operator<<(std::ostream& out, const TimestampType& obj); - - -class TimeType : public virtual ::duckdb_apache::thrift::TBase { - public: - - TimeType(const TimeType&); - TimeType& operator=(const TimeType&); - TimeType() : isAdjustedToUTC(0) { - } - - virtual ~TimeType() throw(); - bool isAdjustedToUTC; - TimeUnit unit; - - void __set_isAdjustedToUTC(const bool val); - - void __set_unit(const TimeUnit& val); - - bool operator == (const TimeType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } - bool operator != (const TimeType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TimeType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TimeType &a, TimeType &b); - -std::ostream& operator<<(std::ostream& out, const TimeType& obj); - - -class IntType : public virtual ::duckdb_apache::thrift::TBase { - public: - - IntType(const IntType&); - IntType& operator=(const IntType&); - IntType() : bitWidth(0), isSigned(0) { - } - - virtual ~IntType() throw(); - int8_t bitWidth; - bool isSigned; - - void __set_bitWidth(const int8_t val); - - void __set_isSigned(const bool val); - - bool operator == (const IntType & rhs) const - { - if (!(bitWidth == rhs.bitWidth)) - return false; - if (!(isSigned == rhs.isSigned)) - return false; - return true; - } - bool operator != (const IntType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const IntType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IntType &a, IntType &b); - -std::ostream& operator<<(std::ostream& out, const IntType& obj); - - -class JsonType : public virtual ::duckdb_apache::thrift::TBase { - public: - - JsonType(const JsonType&); - JsonType& operator=(const JsonType&); - JsonType() { - } - - virtual ~JsonType() throw(); - - bool operator == (const JsonType & /* rhs */) const - { - return true; - } - bool operator != (const JsonType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const JsonType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(JsonType &a, JsonType &b); - -std::ostream& operator<<(std::ostream& out, const JsonType& obj); - - -class BsonType : public virtual ::duckdb_apache::thrift::TBase { - public: - - BsonType(const BsonType&); - BsonType& operator=(const BsonType&); - BsonType() { - } - - virtual ~BsonType() throw(); - - bool operator == (const BsonType & /* rhs */) const - { - return true; - } - bool operator != (const BsonType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const BsonType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(BsonType &a, BsonType &b); - -std::ostream& operator<<(std::ostream& out, const BsonType& obj); - -typedef struct _LogicalType__isset { - _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false) {} - bool STRING :1; - bool MAP :1; - bool LIST :1; - bool ENUM :1; - bool DECIMAL :1; - bool DATE :1; - bool TIME :1; - bool TIMESTAMP :1; - bool INTEGER :1; - bool UNKNOWN :1; - bool JSON :1; - bool BSON :1; - bool UUID :1; -} _LogicalType__isset; - -class LogicalType : public virtual ::duckdb_apache::thrift::TBase { - public: - - LogicalType(const LogicalType&); - LogicalType& operator=(const LogicalType&); - LogicalType() { - } - - virtual ~LogicalType() throw(); - StringType STRING; - MapType MAP; - ListType LIST; - EnumType ENUM; - DecimalType DECIMAL; - DateType DATE; - TimeType TIME; - TimestampType TIMESTAMP; - IntType INTEGER; - NullType UNKNOWN; - JsonType JSON; - BsonType BSON; - UUIDType UUID; - - _LogicalType__isset __isset; - - void __set_STRING(const StringType& val); - - void __set_MAP(const MapType& val); - - void __set_LIST(const ListType& val); - - void __set_ENUM(const EnumType& val); - - void __set_DECIMAL(const DecimalType& val); - - void __set_DATE(const DateType& val); - - void __set_TIME(const TimeType& val); - - void __set_TIMESTAMP(const TimestampType& val); - - void __set_INTEGER(const IntType& val); - - void __set_UNKNOWN(const NullType& val); - - void __set_JSON(const JsonType& val); - - void __set_BSON(const BsonType& val); - - void __set_UUID(const UUIDType& val); - - bool operator == (const LogicalType & rhs) const - { - if (__isset.STRING != rhs.__isset.STRING) - return false; - else if (__isset.STRING && !(STRING == rhs.STRING)) - return false; - if (__isset.MAP != rhs.__isset.MAP) - return false; - else if (__isset.MAP && !(MAP == rhs.MAP)) - return false; - if (__isset.LIST != rhs.__isset.LIST) - return false; - else if (__isset.LIST && !(LIST == rhs.LIST)) - return false; - if (__isset.ENUM != rhs.__isset.ENUM) - return false; - else if (__isset.ENUM && !(ENUM == rhs.ENUM)) - return false; - if (__isset.DECIMAL != rhs.__isset.DECIMAL) - return false; - else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) - return false; - if (__isset.DATE != rhs.__isset.DATE) - return false; - else if (__isset.DATE && !(DATE == rhs.DATE)) - return false; - if (__isset.TIME != rhs.__isset.TIME) - return false; - else if (__isset.TIME && !(TIME == rhs.TIME)) - return false; - if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) - return false; - else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) - return false; - if (__isset.INTEGER != rhs.__isset.INTEGER) - return false; - else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) - return false; - if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) - return false; - else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) - return false; - if (__isset.JSON != rhs.__isset.JSON) - return false; - else if (__isset.JSON && !(JSON == rhs.JSON)) - return false; - if (__isset.BSON != rhs.__isset.BSON) - return false; - else if (__isset.BSON && !(BSON == rhs.BSON)) - return false; - if (__isset.UUID != rhs.__isset.UUID) - return false; - else if (__isset.UUID && !(UUID == rhs.UUID)) - return false; - return true; - } - bool operator != (const LogicalType &rhs) const { - return !(*this == rhs); - } - - bool operator < (const LogicalType & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(LogicalType &a, LogicalType &b); - -std::ostream& operator<<(std::ostream& out, const LogicalType& obj); - -typedef struct _SchemaElement__isset { - _SchemaElement__isset() : type(false), type_length(false), repetition_type(false), num_children(false), converted_type(false), scale(false), precision(false), field_id(false), logicalType(false) {} - bool type :1; - bool type_length :1; - bool repetition_type :1; - bool num_children :1; - bool converted_type :1; - bool scale :1; - bool precision :1; - bool field_id :1; - bool logicalType :1; -} _SchemaElement__isset; - -class SchemaElement : public virtual ::duckdb_apache::thrift::TBase { - public: - - SchemaElement(const SchemaElement&); - SchemaElement& operator=(const SchemaElement&); - SchemaElement() : type((Type::type)0), type_length(0), repetition_type((FieldRepetitionType::type)0), name(), num_children(0), converted_type((ConvertedType::type)0), scale(0), precision(0), field_id(0) { - } - - virtual ~SchemaElement() throw(); - Type::type type; - int32_t type_length; - FieldRepetitionType::type repetition_type; - std::string name; - int32_t num_children; - ConvertedType::type converted_type; - int32_t scale; - int32_t precision; - int32_t field_id; - LogicalType logicalType; - - _SchemaElement__isset __isset; - - void __set_type(const Type::type val); - - void __set_type_length(const int32_t val); - - void __set_repetition_type(const FieldRepetitionType::type val); - - void __set_name(const std::string& val); - - void __set_num_children(const int32_t val); - - void __set_converted_type(const ConvertedType::type val); - - void __set_scale(const int32_t val); - - void __set_precision(const int32_t val); - - void __set_field_id(const int32_t val); - - void __set_logicalType(const LogicalType& val); - - bool operator == (const SchemaElement & rhs) const - { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - if (__isset.field_id != rhs.__isset.field_id) - return false; - else if (__isset.field_id && !(field_id == rhs.field_id)) - return false; - if (__isset.logicalType != rhs.__isset.logicalType) - return false; - else if (__isset.logicalType && !(logicalType == rhs.logicalType)) - return false; - return true; - } - bool operator != (const SchemaElement &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SchemaElement & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SchemaElement &a, SchemaElement &b); - -std::ostream& operator<<(std::ostream& out, const SchemaElement& obj); - -typedef struct _DataPageHeader__isset { - _DataPageHeader__isset() : statistics(false) {} - bool statistics :1; -} _DataPageHeader__isset; - -class DataPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - DataPageHeader(const DataPageHeader&); - DataPageHeader& operator=(const DataPageHeader&); - DataPageHeader() : num_values(0), encoding((Encoding::type)0), definition_level_encoding((Encoding::type)0), repetition_level_encoding((Encoding::type)0) { - } - - virtual ~DataPageHeader() throw(); - int32_t num_values; - Encoding::type encoding; - Encoding::type definition_level_encoding; - Encoding::type repetition_level_encoding; - Statistics statistics; - - _DataPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_level_encoding(const Encoding::type val); - - void __set_repetition_level_encoding(const Encoding::type val); - - void __set_statistics(const Statistics& val); - - bool operator == (const DataPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeader &a, DataPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); - - -class IndexPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - IndexPageHeader(const IndexPageHeader&); - IndexPageHeader& operator=(const IndexPageHeader&); - IndexPageHeader() { - } - - virtual ~IndexPageHeader() throw(); - - bool operator == (const IndexPageHeader & /* rhs */) const - { - return true; - } - bool operator != (const IndexPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const IndexPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(IndexPageHeader &a, IndexPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj); - -typedef struct _DictionaryPageHeader__isset { - _DictionaryPageHeader__isset() : is_sorted(false) {} - bool is_sorted :1; -} _DictionaryPageHeader__isset; - -class DictionaryPageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - DictionaryPageHeader(const DictionaryPageHeader&); - DictionaryPageHeader& operator=(const DictionaryPageHeader&); - DictionaryPageHeader() : num_values(0), encoding((Encoding::type)0), is_sorted(0) { - } - - virtual ~DictionaryPageHeader() throw(); - int32_t num_values; - Encoding::type encoding; - bool is_sorted; - - _DictionaryPageHeader__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_is_sorted(const bool val); - - bool operator == (const DictionaryPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } - bool operator != (const DictionaryPageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DictionaryPageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DictionaryPageHeader &a, DictionaryPageHeader &b); - -std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj); - -typedef struct _DataPageHeaderV2__isset { - _DataPageHeaderV2__isset() : is_compressed(true), statistics(false) {} - bool is_compressed :1; - bool statistics :1; -} _DataPageHeaderV2__isset; - -class DataPageHeaderV2 : public virtual ::duckdb_apache::thrift::TBase { - public: - - DataPageHeaderV2(const DataPageHeaderV2&); - DataPageHeaderV2& operator=(const DataPageHeaderV2&); - DataPageHeaderV2() : num_values(0), num_nulls(0), num_rows(0), encoding((Encoding::type)0), definition_levels_byte_length(0), repetition_levels_byte_length(0), is_compressed(true) { - } - - virtual ~DataPageHeaderV2() throw(); - int32_t num_values; - int32_t num_nulls; - int32_t num_rows; - Encoding::type encoding; - int32_t definition_levels_byte_length; - int32_t repetition_levels_byte_length; - bool is_compressed; - Statistics statistics; - - _DataPageHeaderV2__isset __isset; - - void __set_num_values(const int32_t val); - - void __set_num_nulls(const int32_t val); - - void __set_num_rows(const int32_t val); - - void __set_encoding(const Encoding::type val); - - void __set_definition_levels_byte_length(const int32_t val); - - void __set_repetition_levels_byte_length(const int32_t val); - - void __set_is_compressed(const bool val); - - void __set_statistics(const Statistics& val); - - bool operator == (const DataPageHeaderV2 & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } - bool operator != (const DataPageHeaderV2 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const DataPageHeaderV2 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b); - -std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); - -typedef struct _PageHeader__isset { - _PageHeader__isset() : crc(false), data_page_header(false), index_page_header(false), dictionary_page_header(false), data_page_header_v2(false) {} - bool crc :1; - bool data_page_header :1; - bool index_page_header :1; - bool dictionary_page_header :1; - bool data_page_header_v2 :1; -} _PageHeader__isset; - -class PageHeader : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageHeader(const PageHeader&); - PageHeader& operator=(const PageHeader&); - PageHeader() : type((PageType::type)0), uncompressed_page_size(0), compressed_page_size(0), crc(0) { - } - - virtual ~PageHeader() throw(); - PageType::type type; - int32_t uncompressed_page_size; - int32_t compressed_page_size; - int32_t crc; - DataPageHeader data_page_header; - IndexPageHeader index_page_header; - DictionaryPageHeader dictionary_page_header; - DataPageHeaderV2 data_page_header_v2; - - _PageHeader__isset __isset; - - void __set_type(const PageType::type val); - - void __set_uncompressed_page_size(const int32_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_crc(const int32_t val); - - void __set_data_page_header(const DataPageHeader& val); - - void __set_index_page_header(const IndexPageHeader& val); - - void __set_dictionary_page_header(const DictionaryPageHeader& val); - - void __set_data_page_header_v2(const DataPageHeaderV2& val); - - bool operator == (const PageHeader & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } - bool operator != (const PageHeader &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageHeader & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageHeader &a, PageHeader &b); - -std::ostream& operator<<(std::ostream& out, const PageHeader& obj); - -typedef struct _KeyValue__isset { - _KeyValue__isset() : value(false) {} - bool value :1; -} _KeyValue__isset; - -class KeyValue : public virtual ::duckdb_apache::thrift::TBase { - public: - - KeyValue(const KeyValue&); - KeyValue& operator=(const KeyValue&); - KeyValue() : key(), value() { - } - - virtual ~KeyValue() throw(); - std::string key; - std::string value; - - _KeyValue__isset __isset; - - void __set_key(const std::string& val); - - void __set_value(const std::string& val); - - bool operator == (const KeyValue & rhs) const - { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } - bool operator != (const KeyValue &rhs) const { - return !(*this == rhs); - } - - bool operator < (const KeyValue & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(KeyValue &a, KeyValue &b); - -std::ostream& operator<<(std::ostream& out, const KeyValue& obj); - - -class SortingColumn : public virtual ::duckdb_apache::thrift::TBase { - public: - - SortingColumn(const SortingColumn&); - SortingColumn& operator=(const SortingColumn&); - SortingColumn() : column_idx(0), descending(0), nulls_first(0) { - } - - virtual ~SortingColumn() throw(); - int32_t column_idx; - bool descending; - bool nulls_first; - - void __set_column_idx(const int32_t val); - - void __set_descending(const bool val); - - void __set_nulls_first(const bool val); - - bool operator == (const SortingColumn & rhs) const - { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } - bool operator != (const SortingColumn &rhs) const { - return !(*this == rhs); - } - - bool operator < (const SortingColumn & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(SortingColumn &a, SortingColumn &b); - -std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); - - -class PageEncodingStats : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageEncodingStats(const PageEncodingStats&); - PageEncodingStats& operator=(const PageEncodingStats&); - PageEncodingStats() : page_type((PageType::type)0), encoding((Encoding::type)0), count(0) { - } - - virtual ~PageEncodingStats() throw(); - PageType::type page_type; - Encoding::type encoding; - int32_t count; - - void __set_page_type(const PageType::type val); - - void __set_encoding(const Encoding::type val); - - void __set_count(const int32_t val); - - bool operator == (const PageEncodingStats & rhs) const - { - if (!(page_type == rhs.page_type)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(count == rhs.count)) - return false; - return true; - } - bool operator != (const PageEncodingStats &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageEncodingStats & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageEncodingStats &a, PageEncodingStats &b); - -std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); - -typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false) {} - bool key_value_metadata :1; - bool index_page_offset :1; - bool dictionary_page_offset :1; - bool statistics :1; - bool encoding_stats :1; -} _ColumnMetaData__isset; - -class ColumnMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnMetaData(const ColumnMetaData&); - ColumnMetaData& operator=(const ColumnMetaData&); - ColumnMetaData() : type((Type::type)0), codec((CompressionCodec::type)0), num_values(0), total_uncompressed_size(0), total_compressed_size(0), data_page_offset(0), index_page_offset(0), dictionary_page_offset(0) { - } - - virtual ~ColumnMetaData() throw(); - Type::type type; - duckdb::vector encodings; - duckdb::vector path_in_schema; - CompressionCodec::type codec; - int64_t num_values; - int64_t total_uncompressed_size; - int64_t total_compressed_size; - duckdb::vector key_value_metadata; - int64_t data_page_offset; - int64_t index_page_offset; - int64_t dictionary_page_offset; - Statistics statistics; - duckdb::vector encoding_stats; - - _ColumnMetaData__isset __isset; - - void __set_type(const Type::type val); - - void __set_encodings(const duckdb::vector & val); - - void __set_path_in_schema(const duckdb::vector & val); - - void __set_codec(const CompressionCodec::type val); - - void __set_num_values(const int64_t val); - - void __set_total_uncompressed_size(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_key_value_metadata(const duckdb::vector & val); - - void __set_data_page_offset(const int64_t val); - - void __set_index_page_offset(const int64_t val); - - void __set_dictionary_page_offset(const int64_t val); - - void __set_statistics(const Statistics& val); - - void __set_encoding_stats(const duckdb::vector & val); - - bool operator == (const ColumnMetaData & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - if (__isset.encoding_stats != rhs.__isset.encoding_stats) - return false; - else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) - return false; - return true; - } - bool operator != (const ColumnMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnMetaData &a, ColumnMetaData &b); - -std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); - - -class EncryptionWithFooterKey : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionWithFooterKey(const EncryptionWithFooterKey&); - EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&); - EncryptionWithFooterKey() { - } - - virtual ~EncryptionWithFooterKey() throw(); - - bool operator == (const EncryptionWithFooterKey & /* rhs */) const - { - return true; - } - bool operator != (const EncryptionWithFooterKey &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionWithFooterKey & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj); - -typedef struct _EncryptionWithColumnKey__isset { - _EncryptionWithColumnKey__isset() : key_metadata(false) {} - bool key_metadata :1; -} _EncryptionWithColumnKey__isset; - -class EncryptionWithColumnKey : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionWithColumnKey(const EncryptionWithColumnKey&); - EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); - EncryptionWithColumnKey() : key_metadata() { - } - - virtual ~EncryptionWithColumnKey() throw(); - duckdb::vector path_in_schema; - std::string key_metadata; - - _EncryptionWithColumnKey__isset __isset; - - void __set_path_in_schema(const duckdb::vector & val); - - void __set_key_metadata(const std::string& val); - - bool operator == (const EncryptionWithColumnKey & rhs) const - { - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator != (const EncryptionWithColumnKey &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionWithColumnKey & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj); - -typedef struct _ColumnCryptoMetaData__isset { - _ColumnCryptoMetaData__isset() : ENCRYPTION_WITH_FOOTER_KEY(false), ENCRYPTION_WITH_COLUMN_KEY(false) {} - bool ENCRYPTION_WITH_FOOTER_KEY :1; - bool ENCRYPTION_WITH_COLUMN_KEY :1; -} _ColumnCryptoMetaData__isset; - -class ColumnCryptoMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnCryptoMetaData(const ColumnCryptoMetaData&); - ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); - ColumnCryptoMetaData() { - } - - virtual ~ColumnCryptoMetaData() throw(); - EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; - EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY; - - _ColumnCryptoMetaData__isset __isset; - - void __set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val); - - void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); - - bool operator == (const ColumnCryptoMetaData & rhs) const - { - if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) - return false; - if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) - return false; - return true; - } - bool operator != (const ColumnCryptoMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnCryptoMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b); - -std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj); - -typedef struct _ColumnChunk__isset { - _ColumnChunk__isset() : file_path(false), meta_data(false), offset_index_offset(false), offset_index_length(false), column_index_offset(false), column_index_length(false), crypto_metadata(false), encrypted_column_metadata(false) {} - bool file_path :1; - bool meta_data :1; - bool offset_index_offset :1; - bool offset_index_length :1; - bool column_index_offset :1; - bool column_index_length :1; - bool crypto_metadata :1; - bool encrypted_column_metadata :1; -} _ColumnChunk__isset; - -class ColumnChunk : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnChunk(const ColumnChunk&); - ColumnChunk& operator=(const ColumnChunk&); - ColumnChunk() : file_path(), file_offset(0), offset_index_offset(0), offset_index_length(0), column_index_offset(0), column_index_length(0), encrypted_column_metadata() { - } - - virtual ~ColumnChunk() throw(); - std::string file_path; - int64_t file_offset; - ColumnMetaData meta_data; - int64_t offset_index_offset; - int32_t offset_index_length; - int64_t column_index_offset; - int32_t column_index_length; - ColumnCryptoMetaData crypto_metadata; - std::string encrypted_column_metadata; - - _ColumnChunk__isset __isset; - - void __set_file_path(const std::string& val); - - void __set_file_offset(const int64_t val); - - void __set_meta_data(const ColumnMetaData& val); - - void __set_offset_index_offset(const int64_t val); - - void __set_offset_index_length(const int32_t val); - - void __set_column_index_offset(const int64_t val); - - void __set_column_index_length(const int32_t val); - - void __set_crypto_metadata(const ColumnCryptoMetaData& val); - - void __set_encrypted_column_metadata(const std::string& val); - - bool operator == (const ColumnChunk & rhs) const - { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) - return false; - else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) - return false; - if (__isset.offset_index_length != rhs.__isset.offset_index_length) - return false; - else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) - return false; - if (__isset.column_index_offset != rhs.__isset.column_index_offset) - return false; - else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) - return false; - if (__isset.column_index_length != rhs.__isset.column_index_length) - return false; - else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) - return false; - if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) - return false; - else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) - return false; - if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) - return false; - else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) - return false; - return true; - } - bool operator != (const ColumnChunk &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnChunk & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnChunk &a, ColumnChunk &b); - -std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj); - -typedef struct _RowGroup__isset { - _RowGroup__isset() : sorting_columns(false), file_offset(false), total_compressed_size(false), ordinal(false) {} - bool sorting_columns :1; - bool file_offset :1; - bool total_compressed_size :1; - bool ordinal :1; -} _RowGroup__isset; - -class RowGroup : public virtual ::duckdb_apache::thrift::TBase { - public: - - RowGroup(const RowGroup&); - RowGroup& operator=(const RowGroup&); - RowGroup() : total_byte_size(0), num_rows(0), file_offset(0), total_compressed_size(0), ordinal(0) { - } - - virtual ~RowGroup() throw(); - duckdb::vector columns; - int64_t total_byte_size; - int64_t num_rows; - duckdb::vector sorting_columns; - int64_t file_offset; - int64_t total_compressed_size; - int16_t ordinal; - - _RowGroup__isset __isset; - - void __set_columns(const duckdb::vector & val); - - void __set_total_byte_size(const int64_t val); - - void __set_num_rows(const int64_t val); - - void __set_sorting_columns(const duckdb::vector & val); - - void __set_file_offset(const int64_t val); - - void __set_total_compressed_size(const int64_t val); - - void __set_ordinal(const int16_t val); - - bool operator == (const RowGroup & rhs) const - { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - if (__isset.file_offset != rhs.__isset.file_offset) - return false; - else if (__isset.file_offset && !(file_offset == rhs.file_offset)) - return false; - if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) - return false; - else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.ordinal != rhs.__isset.ordinal) - return false; - else if (__isset.ordinal && !(ordinal == rhs.ordinal)) - return false; - return true; - } - bool operator != (const RowGroup &rhs) const { - return !(*this == rhs); - } - - bool operator < (const RowGroup & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(RowGroup &a, RowGroup &b); - -std::ostream& operator<<(std::ostream& out, const RowGroup& obj); - - -class TypeDefinedOrder : public virtual ::duckdb_apache::thrift::TBase { - public: - - TypeDefinedOrder(const TypeDefinedOrder&); - TypeDefinedOrder& operator=(const TypeDefinedOrder&); - TypeDefinedOrder() { - } - - virtual ~TypeDefinedOrder() throw(); - - bool operator == (const TypeDefinedOrder & /* rhs */) const - { - return true; - } - bool operator != (const TypeDefinedOrder &rhs) const { - return !(*this == rhs); - } - - bool operator < (const TypeDefinedOrder & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(TypeDefinedOrder &a, TypeDefinedOrder &b); - -std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj); - -typedef struct _ColumnOrder__isset { - _ColumnOrder__isset() : TYPE_ORDER(false) {} - bool TYPE_ORDER :1; -} _ColumnOrder__isset; - -class ColumnOrder : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnOrder(const ColumnOrder&); - ColumnOrder& operator=(const ColumnOrder&); - ColumnOrder() { - } - - virtual ~ColumnOrder() throw(); - TypeDefinedOrder TYPE_ORDER; - - _ColumnOrder__isset __isset; - - void __set_TYPE_ORDER(const TypeDefinedOrder& val); - - bool operator == (const ColumnOrder & rhs) const - { - if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) - return false; - else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) - return false; - return true; - } - bool operator != (const ColumnOrder &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnOrder & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnOrder &a, ColumnOrder &b); - -std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); - - -class PageLocation : public virtual ::duckdb_apache::thrift::TBase { - public: - - PageLocation(const PageLocation&); - PageLocation& operator=(const PageLocation&); - PageLocation() : offset(0), compressed_page_size(0), first_row_index(0) { - } - - virtual ~PageLocation() throw(); - int64_t offset; - int32_t compressed_page_size; - int64_t first_row_index; - - void __set_offset(const int64_t val); - - void __set_compressed_page_size(const int32_t val); - - void __set_first_row_index(const int64_t val); - - bool operator == (const PageLocation & rhs) const - { - if (!(offset == rhs.offset)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (!(first_row_index == rhs.first_row_index)) - return false; - return true; - } - bool operator != (const PageLocation &rhs) const { - return !(*this == rhs); - } - - bool operator < (const PageLocation & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(PageLocation &a, PageLocation &b); - -std::ostream& operator<<(std::ostream& out, const PageLocation& obj); - - -class OffsetIndex : public virtual ::duckdb_apache::thrift::TBase { - public: - - OffsetIndex(const OffsetIndex&); - OffsetIndex& operator=(const OffsetIndex&); - OffsetIndex() { - } - - virtual ~OffsetIndex() throw(); - duckdb::vector page_locations; - - void __set_page_locations(const duckdb::vector & val); - - bool operator == (const OffsetIndex & rhs) const - { - if (!(page_locations == rhs.page_locations)) - return false; - return true; - } - bool operator != (const OffsetIndex &rhs) const { - return !(*this == rhs); - } - - bool operator < (const OffsetIndex & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(OffsetIndex &a, OffsetIndex &b); - -std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj); - -typedef struct _ColumnIndex__isset { - _ColumnIndex__isset() : null_counts(false) {} - bool null_counts :1; -} _ColumnIndex__isset; - -class ColumnIndex : public virtual ::duckdb_apache::thrift::TBase { - public: - - ColumnIndex(const ColumnIndex&); - ColumnIndex& operator=(const ColumnIndex&); - ColumnIndex() : boundary_order((BoundaryOrder::type)0) { - } - - virtual ~ColumnIndex() throw(); - duckdb::vector null_pages; - duckdb::vector min_values; - duckdb::vector max_values; - BoundaryOrder::type boundary_order; - duckdb::vector null_counts; - - _ColumnIndex__isset __isset; - - void __set_null_pages(const duckdb::vector & val); - - void __set_min_values(const duckdb::vector & val); - - void __set_max_values(const duckdb::vector & val); - - void __set_boundary_order(const BoundaryOrder::type val); - - void __set_null_counts(const duckdb::vector & val); - - bool operator == (const ColumnIndex & rhs) const - { - if (!(null_pages == rhs.null_pages)) - return false; - if (!(min_values == rhs.min_values)) - return false; - if (!(max_values == rhs.max_values)) - return false; - if (!(boundary_order == rhs.boundary_order)) - return false; - if (__isset.null_counts != rhs.__isset.null_counts) - return false; - else if (__isset.null_counts && !(null_counts == rhs.null_counts)) - return false; - return true; - } - bool operator != (const ColumnIndex &rhs) const { - return !(*this == rhs); - } - - bool operator < (const ColumnIndex & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(ColumnIndex &a, ColumnIndex &b); - -std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj); - -typedef struct _AesGcmV1__isset { - _AesGcmV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix :1; - bool aad_file_unique :1; - bool supply_aad_prefix :1; -} _AesGcmV1__isset; - -class AesGcmV1 : public virtual ::duckdb_apache::thrift::TBase { - public: - - AesGcmV1(const AesGcmV1&); - AesGcmV1& operator=(const AesGcmV1&); - AesGcmV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { - } - - virtual ~AesGcmV1() throw(); - std::string aad_prefix; - std::string aad_file_unique; - bool supply_aad_prefix; - - _AesGcmV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator == (const AesGcmV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator != (const AesGcmV1 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const AesGcmV1 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmV1 &a, AesGcmV1 &b); - -std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj); - -typedef struct _AesGcmCtrV1__isset { - _AesGcmCtrV1__isset() : aad_prefix(false), aad_file_unique(false), supply_aad_prefix(false) {} - bool aad_prefix :1; - bool aad_file_unique :1; - bool supply_aad_prefix :1; -} _AesGcmCtrV1__isset; - -class AesGcmCtrV1 : public virtual ::duckdb_apache::thrift::TBase { - public: - - AesGcmCtrV1(const AesGcmCtrV1&); - AesGcmCtrV1& operator=(const AesGcmCtrV1&); - AesGcmCtrV1() : aad_prefix(), aad_file_unique(), supply_aad_prefix(0) { - } - - virtual ~AesGcmCtrV1() throw(); - std::string aad_prefix; - std::string aad_file_unique; - bool supply_aad_prefix; - - _AesGcmCtrV1__isset __isset; - - void __set_aad_prefix(const std::string& val); - - void __set_aad_file_unique(const std::string& val); - - void __set_supply_aad_prefix(const bool val); - - bool operator == (const AesGcmCtrV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } - bool operator != (const AesGcmCtrV1 &rhs) const { - return !(*this == rhs); - } - - bool operator < (const AesGcmCtrV1 & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b); - -std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj); - -typedef struct _EncryptionAlgorithm__isset { - _EncryptionAlgorithm__isset() : AES_GCM_V1(false), AES_GCM_CTR_V1(false) {} - bool AES_GCM_V1 :1; - bool AES_GCM_CTR_V1 :1; -} _EncryptionAlgorithm__isset; - -class EncryptionAlgorithm : public virtual ::duckdb_apache::thrift::TBase { - public: - - EncryptionAlgorithm(const EncryptionAlgorithm&); - EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); - EncryptionAlgorithm() { - } - - virtual ~EncryptionAlgorithm() throw(); - AesGcmV1 AES_GCM_V1; - AesGcmCtrV1 AES_GCM_CTR_V1; - - _EncryptionAlgorithm__isset __isset; - - void __set_AES_GCM_V1(const AesGcmV1& val); - - void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); - - bool operator == (const EncryptionAlgorithm & rhs) const - { - if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) - return false; - else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) - return false; - if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) - return false; - else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) - return false; - return true; - } - bool operator != (const EncryptionAlgorithm &rhs) const { - return !(*this == rhs); - } - - bool operator < (const EncryptionAlgorithm & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b); - -std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj); - -typedef struct _FileMetaData__isset { - _FileMetaData__isset() : key_value_metadata(false), created_by(false), column_orders(false), encryption_algorithm(false), footer_signing_key_metadata(false) {} - bool key_value_metadata :1; - bool created_by :1; - bool column_orders :1; - bool encryption_algorithm :1; - bool footer_signing_key_metadata :1; -} _FileMetaData__isset; - -class FileMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - FileMetaData(const FileMetaData&); - FileMetaData& operator=(const FileMetaData&); - FileMetaData() : version(0), num_rows(0), created_by(), footer_signing_key_metadata() { - } - - virtual ~FileMetaData() throw(); - int32_t version; - duckdb::vector schema; - int64_t num_rows; - duckdb::vector row_groups; - duckdb::vector key_value_metadata; - std::string created_by; - duckdb::vector column_orders; - EncryptionAlgorithm encryption_algorithm; - std::string footer_signing_key_metadata; - - _FileMetaData__isset __isset; - - void __set_version(const int32_t val); - - void __set_schema(const duckdb::vector & val); - - void __set_num_rows(const int64_t val); - - void __set_row_groups(const duckdb::vector & val); - - void __set_key_value_metadata(const duckdb::vector & val); - - void __set_created_by(const std::string& val); - - void __set_column_orders(const duckdb::vector & val); - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_footer_signing_key_metadata(const std::string& val); - - bool operator == (const FileMetaData & rhs) const - { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - if (__isset.column_orders != rhs.__isset.column_orders) - return false; - else if (__isset.column_orders && !(column_orders == rhs.column_orders)) - return false; - if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) - return false; - else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) - return false; - else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) - return false; - return true; - } - bool operator != (const FileMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const FileMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileMetaData &a, FileMetaData &b); - -std::ostream& operator<<(std::ostream& out, const FileMetaData& obj); - -typedef struct _FileCryptoMetaData__isset { - _FileCryptoMetaData__isset() : key_metadata(false) {} - bool key_metadata :1; -} _FileCryptoMetaData__isset; - -class FileCryptoMetaData : public virtual ::duckdb_apache::thrift::TBase { - public: - - FileCryptoMetaData(const FileCryptoMetaData&); - FileCryptoMetaData& operator=(const FileCryptoMetaData&); - FileCryptoMetaData() : key_metadata() { - } - - virtual ~FileCryptoMetaData() throw(); - EncryptionAlgorithm encryption_algorithm; - std::string key_metadata; - - _FileCryptoMetaData__isset __isset; - - void __set_encryption_algorithm(const EncryptionAlgorithm& val); - - void __set_key_metadata(const std::string& val); - - bool operator == (const FileCryptoMetaData & rhs) const - { - if (!(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } - bool operator != (const FileCryptoMetaData &rhs) const { - return !(*this == rhs); - } - - bool operator < (const FileCryptoMetaData & ) const; - - uint32_t read(::duckdb_apache::thrift::protocol::TProtocol* iprot); - uint32_t write(::duckdb_apache::thrift::protocol::TProtocol* oprot) const; - - virtual void printTo(std::ostream& out) const; -}; - -void swap(FileCryptoMetaData &a, FileCryptoMetaData &b); - -std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); - -}} // namespace - -#endif diff --git a/src/duckdb/third_party/parquet/windows_compatibility.h b/src/duckdb/third_party/parquet/windows_compatibility.h deleted file mode 100644 index 6cbe6009c..000000000 --- a/src/duckdb/third_party/parquet/windows_compatibility.h +++ /dev/null @@ -1,9 +0,0 @@ -#pragma once - -#ifdef _WIN32 -#undef CREATE_NEW -#undef OPTIONAL -#undef Realloc -#undef min -#undef max -#endif \ No newline at end of file diff --git a/src/duckdb/third_party/snappy/snappy-internal.h b/src/duckdb/third_party/snappy/snappy-internal.h deleted file mode 100644 index f039c5e16..000000000 --- a/src/duckdb/third_party/snappy/snappy-internal.h +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright 2008 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Internals shared between the Snappy implementation and its unittest. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ - -#include "snappy-stubs-internal.h" - -namespace duckdb_snappy { -namespace internal { - -// Working memory performs a single allocation to hold all scratch space -// required for compression. -class WorkingMemory { - public: - explicit WorkingMemory(size_t input_size); - ~WorkingMemory(); - - // Allocates and clears a hash table using memory in "*this", - // stores the number of buckets in "*table_size" and returns a pointer to - // the base of the hash table. - uint16* GetHashTable(size_t fragment_size, int* table_size) const; - char* GetScratchInput() const { return input_; } - char* GetScratchOutput() const { return output_; } - - private: - char* mem_; // the allocated memory, never nullptr - size_t size_; // the size of the allocated memory, never 0 - uint16* table_; // the pointer to the hashtable - char* input_; // the pointer to the input scratch buffer - char* output_; // the pointer to the output scratch buffer - - // No copying - WorkingMemory(const WorkingMemory&); - void operator=(const WorkingMemory&); -}; - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input_length <= kBlockSize" -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input_length)" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -char* CompressFragment(const char* input, - size_t input_length, - char* op, - uint16* table, - const int table_size); - -// Find the largest n such that -// -// s1[0,n-1] == s2[0,n-1] -// and n <= (s2_limit - s2). -// -// Return make_pair(n, n < 8). -// Does not read *s2_limit or beyond. -// Does not read *(s1 + (s2_limit - s2)) or beyond. -// Requires that s2_limit >= s2. -// -// Separate implementation for 64-bit, little-endian cpus. -#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ - (defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM)) -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - assert(s2_limit >= s2); - size_t matched = 0; - - // This block isn't necessary for correctness; we could just start looping - // immediately. As an optimization though, it is useful. It creates some not - // uncommon code paths that determine, without extra effort, whether the match - // length is less than 8. In short, we are hoping to avoid a conditional - // branch, and perhaps get better code layout from the C++ compiler. - if (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - uint64 a1 = UNALIGNED_LOAD64(s1); - uint64 a2 = UNALIGNED_LOAD64(s2); - if (a1 != a2) { - return std::pair(Bits::FindLSBSetNonZero64(a1 ^ a2) >> 3, - true); - } else { - matched = 8; - s2 += 8; - } - } - - // Find out how long the match is. We loop over the data 64 bits at a - // time until we find a 64-bit block that doesn't match; then we find - // the first non-matching bit and use that to calculate the total - // length of the match. - while (SNAPPY_PREDICT_TRUE(s2 <= s2_limit - 8)) { - if (UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) { - s2 += 8; - matched += 8; - } else { - uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero64(x); - matched += matching_bits >> 3; - assert(matched >= 8); - return std::pair(matched, false); - } - } - while (SNAPPY_PREDICT_TRUE(s2 < s2_limit)) { - if (s1[matched] == *s2) { - ++s2; - ++matched; - } else { - return std::pair(matched, matched < 8); - } - } - return std::pair(matched, matched < 8); -} -#else -static inline std::pair FindMatchLength(const char* s1, - const char* s2, - const char* s2_limit) { - // Implementation based on the x86-64 version, above. - assert(s2_limit >= s2); - int matched = 0; - - while (s2 <= s2_limit - 4 && - UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { - s2 += 4; - matched += 4; - } - if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { - uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); - int matching_bits = Bits::FindLSBSetNonZero(x); - matched += matching_bits >> 3; - } else { - while ((s2 < s2_limit) && (s1[matched] == *s2)) { - ++s2; - ++matched; - } - } - return std::pair(matched, matched < 8); -} -#endif - -// Lookup tables for decompression code. Give --snappy_dump_decompression_table -// to the unit test to recompute char_table. - -enum { - LITERAL = 0, - COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode - COPY_2_BYTE_OFFSET = 2, - COPY_4_BYTE_OFFSET = 3 -}; -static const int kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the actual offset. - -// Data stored per entry in lookup table: -// Range Bits-used Description -// ------------------------------------ -// 1..64 0..7 Literal/copy length encoded in opcode byte -// 0..7 8..10 Copy offset encoded in opcode byte / 256 -// 0..4 11..13 Extra bytes after opcode -// -// We use eight bits for the length even though 7 would have sufficed -// because of efficiency reasons: -// (1) Extracting a byte is faster than a bit-field -// (2) It properly aligns copy offset so we do not need a <<8 -static const uint16 char_table[256] = { - 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, - 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, - 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, - 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, - 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, - 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, - 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, - 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, - 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, - 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, - 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, - 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, - 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, - 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, - 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, - 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, - 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, - 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, - 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, - 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, - 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, - 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, - 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, - 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, - 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, - 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, - 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, - 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, - 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, - 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, - 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, - 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 -}; - -} // end namespace internal - - -// The size of a compression block. Note that many parts of the compression -// code assumes that kBlockSize <= 65536; in particular, the hash table -// can only store 16-bit offsets, and EmitCopy() also assumes the offset -// is 65535 bytes or less. Note also that if you change this, it will -// affect the framing format (see framing_format.txt). -// -// Note that there might be older data around that is compressed with larger -// block sizes, so the decompression code should not rely on the -// non-existence of long backreferences. -static const int kBlockLog = 16; -static const size_t kBlockSize = 1 << kBlockLog; - -static const int kMaxHashTableBits = 14; -static const size_t kMaxHashTableSize = 1 << kMaxHashTableBits; - - -} // end namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ diff --git a/src/duckdb/third_party/snappy/snappy-sinksource.cc b/src/duckdb/third_party/snappy/snappy-sinksource.cc deleted file mode 100644 index d18c62762..000000000 --- a/src/duckdb/third_party/snappy/snappy-sinksource.cc +++ /dev/null @@ -1,104 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "snappy-sinksource.h" - -namespace duckdb_snappy { - -Source::~Source() { } - -Sink::~Sink() { } - -char* Sink::GetAppendBuffer(size_t length, char* scratch) { - return scratch; -} - -char* Sink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = scratch_size; - return scratch; -} - -void Sink::AppendAndTakeOwnership( - char* bytes, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - Append(bytes, n); - (*deleter)(deleter_arg, bytes, n); -} - -ByteArraySource::~ByteArraySource() { } - -size_t ByteArraySource::Available() const { return left_; } - -const char* ByteArraySource::Peek(size_t* len) { - *len = left_; - return ptr_; -} - -void ByteArraySource::Skip(size_t n) { - left_ -= n; - ptr_ += n; -} - -UncheckedByteArraySink::~UncheckedByteArraySink() { } - -void UncheckedByteArraySink::Append(const char* data, size_t n) { - // Do no copying if the caller filled in the result of GetAppendBuffer() - if (data != dest_) { - memcpy(dest_, data, n); - } - dest_ += n; -} - -char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { - return dest_; -} - -void UncheckedByteArraySink::AppendAndTakeOwnership( - char* data, size_t n, - void (*deleter)(void*, const char*, size_t), - void *deleter_arg) { - if (data != dest_) { - memcpy(dest_, data, n); - (*deleter)(deleter_arg, data, n); - } - dest_ += n; -} - -char* UncheckedByteArraySink::GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size) { - *allocated_size = desired_size_hint; - return dest_; -} - -} // namespace duckdb_snappy diff --git a/src/duckdb/third_party/snappy/snappy-sinksource.h b/src/duckdb/third_party/snappy/snappy-sinksource.h deleted file mode 100644 index ec2c451b0..000000000 --- a/src/duckdb/third_party/snappy/snappy-sinksource.h +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ -#define THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ - -#include - -namespace duckdb_snappy { - -// A Sink is an interface that consumes a sequence of bytes. -class Sink { - public: - Sink() { } - virtual ~Sink(); - - // Append "bytes[0,n-1]" to this. - virtual void Append(const char* bytes, size_t n) = 0; - - // Returns a writable buffer of the specified length for appending. - // May return a pointer to the caller-owned scratch buffer which - // must have at least the indicated length. The returned buffer is - // only valid until the next operation on this Sink. - // - // After writing at most "length" bytes, call Append() with the - // pointer returned from this function and the number of bytes - // written. Many Append() implementations will avoid copying - // bytes if this function returned an internal buffer. - // - // If a non-scratch buffer is returned, the caller may only pass a - // prefix of it to Append(). That is, it is not correct to pass an - // interior pointer of the returned array to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBuffer(size_t length, char* scratch); - - // For higher performance, Sink implementations can provide custom - // AppendAndTakeOwnership() and GetAppendBufferVariable() methods. - // These methods can reduce the number of copies done during - // compression/decompression. - - // Append "bytes[0,n-1] to the sink. Takes ownership of "bytes" - // and calls the deleter function as (*deleter)(deleter_arg, bytes, n) - // to free the buffer. deleter function must be non NULL. - // - // The default implementation just calls Append and frees "bytes". - // Other implementations may avoid a copy while appending the buffer. - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); - - // Returns a writable buffer for appending and writes the buffer's capacity to - // *allocated_size. Guarantees *allocated_size >= min_size. - // May return a pointer to the caller-owned scratch buffer which must have - // scratch_size >= min_size. - // - // The returned buffer is only valid until the next operation - // on this ByteSink. - // - // After writing at most *allocated_size bytes, call Append() with the - // pointer returned from this function and the number of bytes written. - // Many Append() implementations will avoid copying bytes if this function - // returned an internal buffer. - // - // If the sink implementation allocates or reallocates an internal buffer, - // it should use the desired_size_hint if appropriate. If a caller cannot - // provide a reasonable guess at the desired capacity, it should set - // desired_size_hint = 0. - // - // If a non-scratch buffer is returned, the caller may only pass - // a prefix to it to Append(). That is, it is not correct to pass an - // interior pointer to Append(). - // - // The default implementation always returns the scratch buffer. - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - - private: - // No copying - Sink(const Sink&); - void operator=(const Sink&); -}; - -// A Source is an interface that yields a sequence of bytes -class Source { - public: - Source() { } - virtual ~Source(); - - // Return the number of bytes left to read from the source - virtual size_t Available() const = 0; - - // Peek at the next flat region of the source. Does not reposition - // the source. The returned region is empty iff Available()==0. - // - // Returns a pointer to the beginning of the region and store its - // length in *len. - // - // The returned region is valid until the next call to Skip() or - // until this object is destroyed, whichever occurs first. - // - // The returned region may be larger than Available() (for example - // if this ByteSource is a view on a substring of a larger source). - // The caller is responsible for ensuring that it only reads the - // Available() bytes. - virtual const char* Peek(size_t* len) = 0; - - // Skip the next n bytes. Invalidates any buffer returned by - // a previous call to Peek(). - // REQUIRES: Available() >= n - virtual void Skip(size_t n) = 0; - - private: - // No copying - Source(const Source&); - void operator=(const Source&); -}; - -// A Source implementation that yields the contents of a flat array -class ByteArraySource : public Source { - public: - ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } - virtual ~ByteArraySource(); - virtual size_t Available() const; - virtual const char* Peek(size_t* len); - virtual void Skip(size_t n); - private: - const char* ptr_; - size_t left_; -}; - -// A Sink implementation that writes to a flat array without any bound checks. -class UncheckedByteArraySink : public Sink { - public: - explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } - virtual ~UncheckedByteArraySink(); - virtual void Append(const char* data, size_t n); - virtual char* GetAppendBuffer(size_t len, char* scratch); - virtual char* GetAppendBufferVariable( - size_t min_size, size_t desired_size_hint, char* scratch, - size_t scratch_size, size_t* allocated_size); - virtual void AppendAndTakeOwnership( - char* bytes, size_t n, void (*deleter)(void*, const char*, size_t), - void *deleter_arg); - - // Return the current output pointer so that a caller can see how - // many bytes were produced. - // Note: this is not a Sink method. - char* CurrentDestination() const { return dest_; } - private: - char* dest_; -}; - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_SINKSOURCE_H_ diff --git a/src/duckdb/third_party/snappy/snappy-stubs-internal.h b/src/duckdb/third_party/snappy/snappy-stubs-internal.h deleted file mode 100644 index c53adddeb..000000000 --- a/src/duckdb/third_party/snappy/snappy-stubs-internal.h +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various stubs for the open-source version of Snappy. - -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ - -// #ifdef HAVE_CONFIG_H -// #include "config.h" -// #endif - -#include - -#include -#include -#include - -#ifdef HAVE_SYS_MMAN_H -#include -#endif - -#ifdef HAVE_UNISTD_H -#include -#endif - -#if defined(_MSC_VER) -#include -#endif // defined(_MSC_VER) - -#ifndef __has_feature -#define __has_feature(x) 0 -#endif - -#if __has_feature(memory_sanitizer) -#include -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) \ - __msan_unpoison((address), (size)) -#else -#define SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(address, size) /* empty */ -#endif // __has_feature(memory_sanitizer) - -#include "snappy-stubs-public.h" - -#if defined(__x86_64__) - -// Enable 64-bit optimized versions of some routines. -#define ARCH_K8 1 - -#elif defined(__ppc64__) - -#define ARCH_PPC 1 - -#elif defined(__aarch64__) - -#define ARCH_ARM 1 - -#endif - -// Needed by OS X, among others. -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -// The size of an array, if known at compile-time. -// Will give unexpected results if used on a pointer. -// We undefine it first, since some compilers already have a definition. -#ifdef ARRAYSIZE -#undef ARRAYSIZE -#endif -#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) - -// Static prediction hints. -#ifdef HAVE_BUILTIN_EXPECT -#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#else -#define SNAPPY_PREDICT_FALSE(x) x -#define SNAPPY_PREDICT_TRUE(x) x -#endif - -// This is only used for recomputing the tag byte table used during -// decompression; for simplicity we just remove it from the open-source -// version (anyone who wants to regenerate it can just do the call -// themselves within main()). -#define DEFINE_bool(flag_name, default_value, description) \ - bool FLAGS_ ## flag_name = default_value -#define DECLARE_bool(flag_name) \ - extern bool FLAGS_ ## flag_name - -namespace duckdb_snappy { - -//static const uint32 kuint32max = static_cast(0xFFFFFFFF); -//static const int64 kint64max = static_cast(0x7FFFFFFFFFFFFFFFLL); - - -// HM: Always use aligned load to keep ourselves out of trouble. Sorry. - -inline uint16 UNALIGNED_LOAD16(const void *p) { - uint16 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint32 UNALIGNED_LOAD32(const void *p) { - uint32 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline uint64 UNALIGNED_LOAD64(const void *p) { - uint64 t; - memcpy(&t, p, sizeof t); - return t; -} - -inline void UNALIGNED_STORE16(void *p, uint16 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE32(void *p, uint32 v) { - memcpy(p, &v, sizeof v); -} - -inline void UNALIGNED_STORE64(void *p, uint64 v) { - memcpy(p, &v, sizeof v); -} - - -// The following guarantees declaration of the byte swap functions. -#if defined(SNAPPY_IS_BIG_ENDIAN) - -#ifdef HAVE_SYS_BYTEORDER_H -#include -#endif - -#ifdef HAVE_SYS_ENDIAN_H -#include -#endif - -#ifdef _MSC_VER -#include -#define bswap_16(x) _byteswap_ushort(x) -#define bswap_32(x) _byteswap_ulong(x) -#define bswap_64(x) _byteswap_uint64(x) - -#elif defined(__APPLE__) -// Mac OS X / Darwin features -#include -#define bswap_16(x) OSSwapInt16(x) -#define bswap_32(x) OSSwapInt32(x) -#define bswap_64(x) OSSwapInt64(x) - -#elif defined(HAVE_BYTESWAP_H) -#include - -#elif defined(bswap32) -// FreeBSD defines bswap{16,32,64} in (already #included). -#define bswap_16(x) bswap16(x) -#define bswap_32(x) bswap32(x) -#define bswap_64(x) bswap64(x) - -#elif defined(BSWAP_64) -// Solaris 10 defines BSWAP_{16,32,64} in (already #included). -#define bswap_16(x) BSWAP_16(x) -#define bswap_32(x) BSWAP_32(x) -#define bswap_64(x) BSWAP_64(x) - -#else - -inline uint16 bswap_16(uint16 x) { - return (x << 8) | (x >> 8); -} - -inline uint32 bswap_32(uint32 x) { - x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8); - return (x >> 16) | (x << 16); -} - -inline uint64 bswap_64(uint64 x) { - x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8); - x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16); - return (x >> 32) | (x << 32); -} - -#endif - -#endif // defined(SNAPPY_IS_BIG_ENDIAN) - -// Convert to little-endian storage, opposite of network format. -// Convert x from host to little endian: x = LittleEndian.FromHost(x); -// convert x from little endian to host: x = LittleEndian.ToHost(x); -// -// Store values into unaligned memory converting to little endian order: -// LittleEndian.Store16(p, x); -// -// Load unaligned values stored in little endian converting to host order: -// x = LittleEndian.Load16(p); -class LittleEndian { - public: - // Conversion functions. -#if defined(SNAPPY_IS_BIG_ENDIAN) - - static uint16 FromHost16(uint16 x) { return bswap_16(x); } - static uint16 ToHost16(uint16 x) { return bswap_16(x); } - - static uint32 FromHost32(uint32 x) { return bswap_32(x); } - static uint32 ToHost32(uint32 x) { return bswap_32(x); } - - static bool IsLittleEndian() { return false; } - -#else // !defined(SNAPPY_IS_BIG_ENDIAN) - - static uint16 FromHost16(uint16 x) { return x; } - static uint16 ToHost16(uint16 x) { return x; } - - static uint32 FromHost32(uint32 x) { return x; } - static uint32 ToHost32(uint32 x) { return x; } - - static bool IsLittleEndian() { return true; } - -#endif // !defined(SNAPPY_IS_BIG_ENDIAN) - - // Functions to do unaligned loads and stores in little-endian order. - static uint16 Load16(const void *p) { - return ToHost16(UNALIGNED_LOAD16(p)); - } - - static void Store16(void *p, uint16 v) { - UNALIGNED_STORE16(p, FromHost16(v)); - } - - static uint32 Load32(const void *p) { - return ToHost32(UNALIGNED_LOAD32(p)); - } - - static void Store32(void *p, uint32 v) { - UNALIGNED_STORE32(p, FromHost32(v)); - } -}; - -// Some bit-manipulation functions. -class Bits { - public: - // Return floor(log2(n)) for positive integer n. - static int Log2FloorNonZero(uint32 n); - - // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. - static int Log2Floor(uint32 n); - - // Return the first set least / most significant bit, 0-indexed. Returns an - // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except - // that it's 0-indexed. - static int FindLSBSetNonZero(uint32 n); - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - static int FindLSBSetNonZero64(uint64 n); -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - - private: - // No copying - Bits(const Bits&); - void operator=(const Bits&); -}; - -#ifdef HAVE_BUILTIN_CTZ - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - // (31 ^ x) is equivalent to (31 - x) for x in [0, 31]. An easy proof - // represents subtraction in base 2 and observes that there's no carry. - // - // GCC and Clang represent __builtin_clz on x86 as 31 ^ _bit_scan_reverse(x). - // Using "31 ^" here instead of "31 -" allows the optimizer to strip the - // function body down to _bit_scan_reverse(x). - return 31 ^ __builtin_clz(n); -} - -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - return __builtin_ctz(n); -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - return __builtin_ctzll(n); -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#elif defined(_MSC_VER) - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - _BitScanReverse(&where, n); - return static_cast(where); -} - -inline int Bits::Log2Floor(uint32 n) { - unsigned long where; - if (_BitScanReverse(&where, n)) - return static_cast(where); - return -1; -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward(&where, n)) - return static_cast(where); - return 32; -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - unsigned long where; - if (_BitScanForward64(&where, n)) - return static_cast(where); - return 64; -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#else // Portable versions. - -inline int Bits::Log2FloorNonZero(uint32 n) { - assert(n != 0); - - int log = 0; - uint32 value = n; - for (int i = 4; i >= 0; --i) { - int shift = (1 << i); - uint32 x = value >> shift; - if (x != 0) { - value = x; - log += shift; - } - } - assert(value == 1); - return log; -} - -inline int Bits::Log2Floor(uint32 n) { - return (n == 0) ? -1 : Bits::Log2FloorNonZero(n); -} - -inline int Bits::FindLSBSetNonZero(uint32 n) { - assert(n != 0); - - int rc = 31; - for (int i = 4, shift = 1 << 4; i >= 0; --i) { - const uint32 x = n << shift; - if (x != 0) { - n = x; - rc -= shift; - } - shift >>= 1; - } - return rc; -} - -#if defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) -// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). -inline int Bits::FindLSBSetNonZero64(uint64 n) { - assert(n != 0); - - const uint32 bottombits = static_cast(n); - if (bottombits == 0) { - // Bottom bits are zero, so scan in top bits - return 32 + FindLSBSetNonZero(static_cast(n >> 32)); - } else { - return FindLSBSetNonZero(bottombits); - } -} -#endif // defined(ARCH_K8) || defined(ARCH_PPC) || defined(ARCH_ARM) - -#endif // End portable versions. - -// Variable-length integer encoding. -class Varint { - public: - // Maximum lengths of varint encoding of uint32. - static const int kMax32 = 5; - - // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. - // Never reads a character at or beyond limit. If a valid/terminated varint32 - // was found in the range, stores it in *OUTPUT and returns a pointer just - // past the last byte of the varint32. Else returns NULL. On success, - // "result <= limit". - static const char* Parse32WithLimit(const char* ptr, const char* limit, - uint32* OUTPUT); - - // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". - // EFFECTS Encodes "v" into "ptr" and returns a pointer to the - // byte just past the last encoded byte. - static char* Encode32(char* ptr, uint32 v); - - // EFFECTS Appends the varint representation of "value" to "*s". - static void Append32(string* s, uint32 value); -}; - -inline const char* Varint::Parse32WithLimit(const char* p, - const char* l, - uint32* OUTPUT) { - const unsigned char* ptr = reinterpret_cast(p); - const unsigned char* limit = reinterpret_cast(l); - uint32 b, result; - if (ptr >= limit) return NULL; - b = *(ptr++); result = b & 127; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; - if (ptr >= limit) return NULL; - b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; - return NULL; // Value is too long to be a varint32 - done: - *OUTPUT = result; - return reinterpret_cast(ptr); -} - -inline char* Varint::Encode32(char* sptr, uint32 v) { - // Operate on characters as unsigneds - unsigned char* ptr = reinterpret_cast(sptr); - static const int B = 128; - if (v < (1<<7)) { - *(ptr++) = v; - } else if (v < (1<<14)) { - *(ptr++) = v | B; - *(ptr++) = v>>7; - } else if (v < (1<<21)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = v>>14; - } else if (v < (1<<28)) { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = v>>21; - } else { - *(ptr++) = v | B; - *(ptr++) = (v>>7) | B; - *(ptr++) = (v>>14) | B; - *(ptr++) = (v>>21) | B; - *(ptr++) = v>>28; - } - return reinterpret_cast(ptr); -} - -// If you know the internal layout of the std::string in use, you can -// replace this function with one that resizes the string without -// filling the new space with zeros (if applicable) -- -// it will be non-portable but faster. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { - s->resize(new_size); -} - -// Return a mutable char* pointing to a string's internal buffer, -// which may not be null-terminated. Writing through this pointer will -// modify the string. -// -// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the -// next call to a string method that invalidates iterators. -// -// As of 2006-04, there is no standard-blessed way of getting a -// mutable reference to a string's internal buffer. However, issue 530 -// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) -// proposes this as the method. It will officially be part of the standard -// for C++0x. This should already work on all current implementations. -inline char* string_as_array(string* str) { - return str->empty() ? NULL : &*str->begin(); -} - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ diff --git a/src/duckdb/third_party/snappy/snappy-stubs-public.h b/src/duckdb/third_party/snappy/snappy-stubs-public.h deleted file mode 100644 index 5500e054d..000000000 --- a/src/duckdb/third_party/snappy/snappy-stubs-public.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Various type stubs for the open-source version of Snappy. -// -// This file cannot include config.h, as it is included from snappy.h, -// which is a public header. Instead, snappy-stubs-public.h is generated by -// from snappy-stubs-public.h.in at configure time. - -#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ -#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ - -#include -#include -#include - -#ifndef _WIN32 // HAVE_SYS_UIO_H -#include -#endif // HAVE_SYS_UIO_H - -#define SNAPPY_MAJOR 1 -#define SNAPPY_MINOR 1 -#define SNAPPY_PATCHLEVEL 7 -#define SNAPPY_VERSION \ - ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) - -namespace duckdb_snappy { - -using int8 = std::int8_t; -using uint8 = std::uint8_t; -using int16 = std::int16_t; -using uint16 = std::uint16_t; -using int32 = std::int32_t; -using uint32 = std::uint32_t; -using int64 = std::int64_t; -using uint64 = std::uint64_t; - -using string = std::string; - -#ifdef _WIN32 // !HAVE_SYS_UIO_H -// Windows does not have an iovec type, yet the concept is universally useful. -// It is simple to define it ourselves, so we put it inside our own namespace. -struct iovec { - void* iov_base; - size_t iov_len; -}; -#endif // !HAVE_SYS_UIO_H - -} // namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ diff --git a/src/duckdb/third_party/snappy/snappy.cc b/src/duckdb/third_party/snappy/snappy.cc deleted file mode 100644 index 76a89de41..000000000 --- a/src/duckdb/third_party/snappy/snappy.cc +++ /dev/null @@ -1,1662 +0,0 @@ -// Copyright 2005 Google Inc. All Rights Reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "snappy.h" -#include "snappy-internal.h" -#include "snappy-sinksource.h" - -#if !defined(SNAPPY_HAVE_SSSE3) -// __SSSE3__ is defined by GCC and Clang. Visual Studio doesn't target SIMD -// support between SSE2 and AVX (so SSSE3 instructions require AVX support), and -// defines __AVX__ when AVX support is available. -#if defined(__SSSE3__) || defined(__AVX__) -#define SNAPPY_HAVE_SSSE3 1 -#else -#define SNAPPY_HAVE_SSSE3 0 -#endif -#endif // !defined(SNAPPY_HAVE_SSSE3) - -#if !defined(SNAPPY_HAVE_BMI2) -// __BMI2__ is defined by GCC and Clang. Visual Studio doesn't target BMI2 -// specifically, but it does define __AVX2__ when AVX2 support is available. -// Fortunately, AVX2 was introduced in Haswell, just like BMI2. -// -// BMI2 is not defined as a subset of AVX2 (unlike SSSE3 and AVX above). So, -// GCC and Clang can build code with AVX2 enabled but BMI2 disabled, in which -// case issuing BMI2 instructions results in a compiler error. -#if defined(__BMI2__) || (defined(_MSC_VER) && defined(__AVX2__)) -#define SNAPPY_HAVE_BMI2 1 -#else -#define SNAPPY_HAVE_BMI2 0 -#endif -#endif // !defined(SNAPPY_HAVE_BMI2) - -#if SNAPPY_HAVE_SSSE3 -// Please do not replace with . or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif - -#if SNAPPY_HAVE_BMI2 -// Please do not replace with . or with headers that assume more -// advanced SSE versions without checking with all the OWNERS. -#include -#endif - -#include - -#include -#include -#include - -namespace duckdb_snappy { - -using internal::COPY_1_BYTE_OFFSET; -using internal::COPY_2_BYTE_OFFSET; -using internal::LITERAL; -using internal::char_table; -using internal::kMaximumTagLength; - -// Any hash function will produce a valid compressed bitstream, but a good -// hash function reduces the number of collisions and thus yields better -// compression for compressible input, and more speed for incompressible -// input. Of course, it doesn't hurt if the hash function is reasonably fast -// either, as it gets called a lot. -static inline uint32 HashBytes(uint32 bytes, int shift) { - uint32 kMul = 0x1e35a7bd; - return (bytes * kMul) >> shift; -} -static inline uint32 Hash(const char* p, int shift) { - return HashBytes(UNALIGNED_LOAD32(p), shift); -} - -size_t MaxCompressedLength(size_t source_len) { - // Compressed data can be defined as: - // compressed := item* literal* - // item := literal* copy - // - // The trailing literal sequence has a space blowup of at most 62/60 - // since a literal of length 60 needs one tag byte + one extra byte - // for length information. - // - // Item blowup is trickier to measure. Suppose the "copy" op copies - // 4 bytes of data. Because of a special check in the encoding code, - // we produce a 4-byte copy only if the offset is < 65536. Therefore - // the copy op takes 3 bytes to encode, and this type of item leads - // to at most the 62/60 blowup for representing literals. - // - // Suppose the "copy" op copies 5 bytes of data. If the offset is big - // enough, it will take 5 bytes to encode the copy op. Therefore the - // worst case here is a one-byte literal followed by a five-byte copy. - // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. - // - // This last factor dominates the blowup, so the final estimate is: - return 32 + source_len + source_len/6; -} - -namespace { - -void UnalignedCopy64(const void* src, void* dst) { - char tmp[8]; - memcpy(tmp, src, 8); - memcpy(dst, tmp, 8); -} - -void UnalignedCopy128(const void* src, void* dst) { - // memcpy gets vectorized when the appropriate compiler options are used. - // For example, x86 compilers targeting SSE2+ will optimize to an SSE2 load - // and store. - char tmp[16]; - memcpy(tmp, src, 16); - memcpy(dst, tmp, 16); -} - -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) a byte at a time. Used -// for handling COPY operations where the input and output regions may overlap. -// For example, suppose: -// src == "ab" -// op == src + 2 -// op_limit == op + 20 -// After IncrementalCopySlow(src, op, op_limit), the result will have eleven -// copies of "ab" -// ababababababababababab -// Note that this does not match the semantics of either memcpy() or memmove(). -inline char* IncrementalCopySlow(const char* src, char* op, - char* const op_limit) { - // TODO: Remove pragma when LLVM is aware this - // function is only called in cold regions and when cold regions don't get - // vectorized or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - while (op < op_limit) { - *op++ = *src++; - } - return op_limit; -} - -#if SNAPPY_HAVE_SSSE3 - -// This is a table of shuffle control masks that can be used as the source -// operand for PSHUFB to permute the contents of the destination XMM register -// into a repeating byte pattern. -alignas(16) const char pshufb_fill_patterns[7][16] = { - {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1}, - {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0}, - {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0}, - {0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3}, - {0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1}, -}; - -#endif // SNAPPY_HAVE_SSSE3 - -// Copy [src, src+(op_limit-op)) to [op, (op_limit-op)) but faster than -// IncrementalCopySlow. buf_limit is the address past the end of the writable -// region of the buffer. -inline char* IncrementalCopy(const char* src, char* op, char* const op_limit, - char* const buf_limit) { - // Terminology: - // - // slop = buf_limit - op - // pat = op - src - // len = limit - op - assert(src < op); - assert(op <= op_limit); - assert(op_limit <= buf_limit); - // NOTE: The compressor always emits 4 <= len <= 64. It is ok to assume that - // to optimize this function but we have to also handle other cases in case - // the input does not satisfy these conditions. - - size_t pattern_size = op - src; - // The cases are split into different branches to allow the branch predictor, - // FDO, and static prediction hints to work better. For each input we list the - // ratio of invocations that match each condition. - // - // input slop < 16 pat < 8 len > 16 - // ------------------------------------------ - // html|html4|cp 0% 1.01% 27.73% - // urls 0% 0.88% 14.79% - // jpg 0% 64.29% 7.14% - // pdf 0% 2.56% 58.06% - // txt[1-4] 0% 0.23% 0.97% - // pb 0% 0.96% 13.88% - // bin 0.01% 22.27% 41.17% - // - // It is very rare that we don't have enough slop for doing block copies. It - // is also rare that we need to expand a pattern. Small patterns are common - // for incompressible formats and for those we are plenty fast already. - // Lengths are normally not greater than 16 but they vary depending on the - // input. In general if we always predict len <= 16 it would be an ok - // prediction. - // - // In order to be fast we want a pattern >= 8 bytes and an unrolled loop - // copying 2x 8 bytes at a time. - - // Handle the uncommon case where pattern is less than 8 bytes. - if (SNAPPY_PREDICT_FALSE(pattern_size < 8)) { -#if SNAPPY_HAVE_SSSE3 - // Load the first eight bytes into an 128-bit XMM register, then use PSHUFB - // to permute the register's contents in-place into a repeating sequence of - // the first "pattern_size" bytes. - // For example, suppose: - // src == "abc" - // op == op + 3 - // After _mm_shuffle_epi8(), "pattern" will have five copies of "abc" - // followed by one byte of slop: abcabcabcabcabca. - // - // The non-SSE fallback implementation suffers from store-forwarding stalls - // because its loads and stores partly overlap. By expanding the pattern - // in-place, we avoid the penalty. - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 16)) { - const __m128i shuffle_mask = _mm_load_si128( - reinterpret_cast(pshufb_fill_patterns) - + pattern_size - 1); - const __m128i pattern = _mm_shuffle_epi8( - _mm_loadl_epi64(reinterpret_cast(src)), shuffle_mask); - // Uninitialized bytes are masked out by the shuffle mask. - // TODO: remove annotation and macro defs once MSan is fixed. - SNAPPY_ANNOTATE_MEMORY_IS_INITIALIZED(&pattern, sizeof(pattern)); - pattern_size *= 16 / pattern_size; - char* op_end = std::min(op_limit, buf_limit - 15); - while (op < op_end) { - _mm_storeu_si128(reinterpret_cast<__m128i*>(op), pattern); - op += pattern_size; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } - return IncrementalCopySlow(src, op, op_limit); -#else // !SNAPPY_HAVE_SSSE3 - // If plenty of buffer space remains, expand the pattern to at least 8 - // bytes. The way the following loop is written, we need 8 bytes of buffer - // space if pattern_size >= 4, 11 bytes if pattern_size is 1 or 3, and 10 - // bytes if pattern_size is 2. Precisely encoding that is probably not - // worthwhile; instead, invoke the slow path if we cannot write 11 bytes - // (because 11 are required in the worst case). - if (SNAPPY_PREDICT_TRUE(op <= buf_limit - 11)) { - while (pattern_size < 8) { - UnalignedCopy64(src, op); - op += pattern_size; - pattern_size *= 2; - } - if (SNAPPY_PREDICT_TRUE(op >= op_limit)) return op_limit; - } else { - return IncrementalCopySlow(src, op, op_limit); - } -#endif // SNAPPY_HAVE_SSSE3 - } - assert(pattern_size >= 8); - - // Copy 2x 8 bytes at a time. Because op - src can be < 16, a single - // UnalignedCopy128 might overwrite data in op. UnalignedCopy64 is safe - // because expanding the pattern to at least 8 bytes guarantees that - // op - src >= 8. - // - // Typically, the op_limit is the gating factor so try to simplify the loop - // based on that. - if (SNAPPY_PREDICT_TRUE(op_limit <= buf_limit - 16)) { - // Factor the displacement from op to the source into a variable. This helps - // simplify the loop below by only varying the op pointer which we need to - // test for the end. Note that this was done after carefully examining the - // generated code to allow the addressing modes in the loop below to - // maximize micro-op fusion where possible on modern Intel processors. The - // generated code should be checked carefully for new processors or with - // major changes to the compiler. - // TODO: Simplify this code when the compiler reliably produces - // the correct x86 instruction sequence. - ptrdiff_t op_to_src = src - op; - - // The trip count of this loop is not large and so unrolling will only hurt - // code size without helping performance. - // - // TODO: Replace with loop trip count hint. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - do { - UnalignedCopy64(op + op_to_src, op); - UnalignedCopy64(op + op_to_src + 8, op + 8); - op += 16; - } while (op < op_limit); - return op_limit; - } - - // Fall back to doing as much as we can with the available slop in the - // buffer. This code path is relatively cold however so we save code size by - // avoiding unrolling and vectorizing. - // - // TODO: Remove pragma when when cold regions don't get vectorized - // or unrolled. -#ifdef __clang__ -#pragma clang loop unroll(disable) -#endif - for (char *op_end = buf_limit - 16; op < op_end; op += 16, src += 16) { - UnalignedCopy64(src, op); - UnalignedCopy64(src + 8, op + 8); - } - if (op >= op_limit) - return op_limit; - - // We only take this branch if we didn't have enough slop and we can do a - // single 8 byte copy. - if (SNAPPY_PREDICT_FALSE(op <= buf_limit - 8)) { - UnalignedCopy64(src, op); - src += 8; - op += 8; - } - return IncrementalCopySlow(src, op, op_limit); -} - -} // namespace - -template -static inline char* EmitLiteral(char* op, - const char* literal, - int len) { - // The vast majority of copies are below 16 bytes, for which a - // call to memcpy is overkill. This fast path can sometimes - // copy up to 15 bytes too much, but that is okay in the - // main loop, since we have a bit to go on for both sides: - // - // - The input will always have kInputMarginBytes = 15 extra - // available bytes, as long as we're in the main loop, and - // if not, allow_fast_path = false. - // - The output will always have 32 spare bytes (see - // MaxCompressedLength). - assert(len > 0); // Zero-length literals are disallowed - int n = len - 1; - if (allow_fast_path && len <= 16) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - - UnalignedCopy128(literal, op); - return op + len; - } - - if (n < 60) { - // Fits in tag byte - *op++ = LITERAL | (n << 2); - } else { - int count = (Bits::Log2Floor(n) >> 3) + 1; - assert(count >= 1); - assert(count <= 4); - *op++ = LITERAL | ((59 + count) << 2); - // Encode in upcoming bytes. - // Write 4 bytes, though we may care about only 1 of them. The output buffer - // is guaranteed to have at least 3 more spaces left as 'len >= 61' holds - // here and there is a memcpy of size 'len' below. - LittleEndian::Store32(op, n); - op += count; - } - memcpy(op, literal, len); - return op + len; -} - -template -static inline char* EmitCopyAtMost64(char* op, size_t offset, size_t len) { - assert(len <= 64); - assert(len >= 4); - assert(offset < 65536); - assert(len_less_than_12 == (len < 12)); - - if (len_less_than_12 && SNAPPY_PREDICT_TRUE(offset < 2048)) { - // offset fits in 11 bits. The 3 highest go in the top of the first byte, - // and the rest go in the second byte. - *op++ = COPY_1_BYTE_OFFSET + ((len - 4) << 2) + ((offset >> 3) & 0xe0); - *op++ = offset & 0xff; - } else { - // Write 4 bytes, though we only care about 3 of them. The output buffer - // is required to have some slack, so the extra byte won't overrun it. - uint32 u = COPY_2_BYTE_OFFSET + ((len - 1) << 2) + (offset << 8); - LittleEndian::Store32(op, u); - op += 3; - } - return op; -} - -template -static inline char* EmitCopy(char* op, size_t offset, size_t len) { - assert(len_less_than_12 == (len < 12)); - if (len_less_than_12) { - return EmitCopyAtMost64(op, offset, len); - } else { - // A special case for len <= 64 might help, but so far measurements suggest - // it's in the noise. - - // Emit 64 byte copies but make sure to keep at least four bytes reserved. - while (SNAPPY_PREDICT_FALSE(len >= 68)) { - op = EmitCopyAtMost64(op, offset, 64); - len -= 64; - } - - // One or two copies will now finish the job. - if (len > 64) { - op = EmitCopyAtMost64(op, offset, 60); - len -= 60; - } - - // Emit remainder. - if (len < 12) { - op = EmitCopyAtMost64(op, offset, len); - } else { - op = EmitCopyAtMost64(op, offset, len); - } - return op; - } -} - -bool GetUncompressedLength(const char* start, size_t n, size_t* result) { - uint32 v = 0; - const char* limit = start + n; - if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { - *result = v; - return true; - } else { - return false; - } -} - -namespace { -uint32 CalculateTableSize(uint32 input_size) { - assert(kMaxHashTableSize >= 256); - if (input_size > kMaxHashTableSize) { - return kMaxHashTableSize; - } - if (input_size < 256) { - return 256; - } - // This is equivalent to Log2Ceiling(input_size), assuming input_size > 1. - // 2 << Log2Floor(x - 1) is equivalent to 1 << (1 + Log2Floor(x - 1)). - return 2u << Bits::Log2Floor(input_size - 1); -} -} // namespace - -namespace internal { -WorkingMemory::WorkingMemory(size_t input_size) { - const size_t max_fragment_size = std::min(input_size, kBlockSize); - const size_t table_size = CalculateTableSize(max_fragment_size); - size_ = table_size * sizeof(*table_) + max_fragment_size + - MaxCompressedLength(max_fragment_size); - mem_ = std::allocator().allocate(size_); - table_ = reinterpret_cast(mem_); - input_ = mem_ + table_size * sizeof(*table_); - output_ = input_ + max_fragment_size; -} - -WorkingMemory::~WorkingMemory() { - std::allocator().deallocate(mem_, size_); -} - -uint16* WorkingMemory::GetHashTable(size_t fragment_size, - int* table_size) const { - const size_t htsize = CalculateTableSize(fragment_size); - memset(table_, 0, htsize * sizeof(*table_)); - *table_size = htsize; - return table_; -} -} // end namespace internal - -// For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will -// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have -// empirically found that overlapping loads such as -// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) -// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. -// -// We have different versions for 64- and 32-bit; ideally we would avoid the -// two functions and just inline the UNALIGNED_LOAD64 call into -// GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever -// enough to avoid loading the value multiple times then. For 64-bit, the load -// is done when GetEightBytesAt() is called, whereas for 32-bit, the load is -// done at GetUint32AtOffset() time. - -#ifdef ARCH_K8 - -typedef uint64 EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return UNALIGNED_LOAD64(ptr); -} - -static inline uint32 GetUint32AtOffset(uint64 v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); -} - -#else - -typedef const char* EightBytesReference; - -static inline EightBytesReference GetEightBytesAt(const char* ptr) { - return ptr; -} - -static inline uint32 GetUint32AtOffset(const char* v, int offset) { - assert(offset >= 0); - assert(offset <= 4); - return UNALIGNED_LOAD32(v + offset); -} - -#endif - -// Flat array compression that does not emit the "uncompressed length" -// prefix. Compresses "input" string to the "*op" buffer. -// -// REQUIRES: "input" is at most "kBlockSize" bytes long. -// REQUIRES: "op" points to an array of memory that is at least -// "MaxCompressedLength(input.size())" in size. -// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. -// REQUIRES: "table_size" is a power of two -// -// Returns an "end" pointer into "op" buffer. -// "end - op" is the compressed size of "input". -namespace internal { -char* CompressFragment(const char* input, - size_t input_size, - char* op, - uint16* table, - const int table_size) { - // "ip" is the input pointer, and "op" is the output pointer. - const char* ip = input; - assert(input_size <= kBlockSize); - assert((table_size & (table_size - 1)) == 0); // table must be power of two - const int shift = 32 - Bits::Log2Floor(table_size); - // assert(static_cast(kuint32max >> shift) == table_size - 1); - const char* ip_end = input + input_size; - const char* base_ip = ip; - // Bytes in [next_emit, ip) will be emitted as literal bytes. Or - // [next_emit, ip_end) after the main loop. - const char* next_emit = ip; - - const size_t kInputMarginBytes = 15; - if (SNAPPY_PREDICT_TRUE(input_size >= kInputMarginBytes)) { - const char* ip_limit = input + input_size - kInputMarginBytes; - - for (uint32 next_hash = Hash(++ip, shift); ; ) { - assert(next_emit < ip); - // The body of this loop calls EmitLiteral once and then EmitCopy one or - // more times. (The exception is that when we're close to exhausting - // the input we goto emit_remainder.) - // - // In the first iteration of this loop we're just starting, so - // there's nothing to copy, so calling EmitLiteral once is - // necessary. And we only start a new iteration when the - // current iteration has determined that a call to EmitLiteral will - // precede the next call to EmitCopy (if any). - // - // Step 1: Scan forward in the input looking for a 4-byte-long match. - // If we get close to exhausting the input then goto emit_remainder. - // - // Heuristic match skipping: If 32 bytes are scanned with no matches - // found, start looking only at every other byte. If 32 more bytes are - // scanned (or skipped), look at every third byte, etc.. When a match is - // found, immediately go back to looking at every byte. This is a small - // loss (~5% performance, ~0.1% density) for compressible data due to more - // bookkeeping, but for non-compressible data (such as JPEG) it's a huge - // win since the compressor quickly "realizes" the data is incompressible - // and doesn't bother looking for matches everywhere. - // - // The "skip" variable keeps track of how many bytes there are since the - // last match; dividing it by 32 (ie. right-shifting by five) gives the - // number of bytes to move ahead for each iteration. - uint32 skip = 32; - - const char* next_ip = ip; - const char* candidate; - do { - ip = next_ip; - uint32 hash = next_hash; - assert(hash == Hash(ip, shift)); - uint32 bytes_between_hash_lookups = skip >> 5; - skip += bytes_between_hash_lookups; - next_ip = ip + bytes_between_hash_lookups; - if (SNAPPY_PREDICT_FALSE(next_ip > ip_limit)) { - goto emit_remainder; - } - next_hash = Hash(next_ip, shift); - candidate = base_ip + table[hash]; - assert(candidate >= base_ip); - assert(candidate < ip); - - table[hash] = ip - base_ip; - } while (SNAPPY_PREDICT_TRUE(UNALIGNED_LOAD32(ip) != - UNALIGNED_LOAD32(candidate))); - - // Step 2: A 4-byte match has been found. We'll later see if more - // than 4 bytes match. But, prior to the match, input - // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." - assert(next_emit + 16 <= ip_end); - op = EmitLiteral(op, next_emit, ip - next_emit); - - // Step 3: Call EmitCopy, and then see if another EmitCopy could - // be our next move. Repeat until we find no match for the - // input immediately after what was consumed by the last EmitCopy call. - // - // If we exit this loop normally then we need to call EmitLiteral next, - // though we don't yet know how big the literal will be. We handle that - // by proceeding to the next iteration of the main loop. We also can exit - // this loop via goto if we get close to exhausting the input. - EightBytesReference input_bytes; - uint32 candidate_bytes = 0; - - do { - // We have a 4-byte match at ip, and no need to emit any - // "literal bytes" prior to ip. - const char* base = ip; - std::pair p = - FindMatchLength(candidate + 4, ip + 4, ip_end); - size_t matched = 4 + p.first; - ip += matched; - size_t offset = base - candidate; - assert(0 == memcmp(base, candidate, matched)); - if (p.second) { - op = EmitCopy(op, offset, matched); - } else { - op = EmitCopy(op, offset, matched); - } - next_emit = ip; - if (SNAPPY_PREDICT_FALSE(ip >= ip_limit)) { - goto emit_remainder; - } - // We are now looking for a 4-byte match again. We read - // table[Hash(ip, shift)] for that. To improve compression, - // we also update table[Hash(ip - 1, shift)] and table[Hash(ip, shift)]. - input_bytes = GetEightBytesAt(ip - 1); - uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); - table[prev_hash] = ip - base_ip - 1; - uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); - candidate = base_ip + table[cur_hash]; - candidate_bytes = UNALIGNED_LOAD32(candidate); - table[cur_hash] = ip - base_ip; - } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); - - next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); - ++ip; - } - } - - emit_remainder: - // Emit the remaining bytes as a literal - if (next_emit < ip_end) { - op = EmitLiteral(op, next_emit, - ip_end - next_emit); - } - - return op; -} -} // end namespace internal - -// Called back at avery compression call to trace parameters and sizes. -static inline void Report(const char *algorithm, size_t compressed_size, - size_t uncompressed_size) {} - -// Signature of output types needed by decompression code. -// The decompression code is templatized on a type that obeys this -// signature so that we do not pay virtual function call overhead in -// the middle of a tight decompression loop. -// -// class DecompressionWriter { -// public: -// // Called before decompression -// void SetExpectedLength(size_t length); -// -// // Called after decompression -// bool CheckLength() const; -// -// // Called repeatedly during decompression -// bool Append(const char* ip, size_t length); -// bool AppendFromSelf(uint32 offset, size_t length); -// -// // The rules for how TryFastAppend differs from Append are somewhat -// // convoluted: -// // -// // - TryFastAppend is allowed to decline (return false) at any -// // time, for any reason -- just "return false" would be -// // a perfectly legal implementation of TryFastAppend. -// // The intention is for TryFastAppend to allow a fast path -// // in the common case of a small append. -// // - TryFastAppend is allowed to read up to bytes -// // from the input buffer, whereas Append is allowed to read -// // . However, if it returns true, it must leave -// // at least five (kMaximumTagLength) bytes in the input buffer -// // afterwards, so that there is always enough space to read the -// // next tag without checking for a refill. -// // - TryFastAppend must always return decline (return false) -// // if is 61 or more, as in this case the literal length is not -// // decoded fully. In practice, this should not be a big problem, -// // as it is unlikely that one would implement a fast path accepting -// // this much data. -// // -// bool TryFastAppend(const char* ip, size_t available, size_t length); -// }; - -static inline uint32 ExtractLowBytes(uint32 v, int n) { - assert(n >= 0); - assert(n <= 4); -#if SNAPPY_HAVE_BMI2 - return _bzhi_u32(v, 8 * n); -#else - // This needs to be wider than uint32 otherwise `mask << 32` will be - // undefined. - uint64 mask = 0xffffffff; - return v & ~(mask << (8 * n)); -#endif -} - -static inline bool LeftShiftOverflows(uint8 value, uint32 shift) { - assert(shift < 32); - static const uint8 masks[] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // - 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}; - return (value & masks[shift]) != 0; -} - -// Helper class for decompression -class SnappyDecompressor { - private: - Source* reader_; // Underlying source of bytes to decompress - const char* ip_; // Points to next buffered byte - const char* ip_limit_; // Points just past buffered bytes - uint32 peeked_; // Bytes peeked from reader (need to skip) - bool eof_; // Hit end of input without an error? - char scratch_[kMaximumTagLength]; // See RefillTag(). - - // Ensure that all of the tag metadata for the next tag is available - // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even - // if (ip_limit_ - ip_ < 5). - // - // Returns true on success, false on error or end of input. - bool RefillTag(); - - public: - explicit SnappyDecompressor(Source* reader) - : reader_(reader), - ip_(NULL), - ip_limit_(NULL), - peeked_(0), - eof_(false) { - } - - ~SnappyDecompressor() { - // Advance past any bytes we peeked at from the reader - reader_->Skip(peeked_); - } - - // Returns true iff we have hit the end of the input without an error. - bool eof() const { - return eof_; - } - - // Read the uncompressed length stored at the start of the compressed data. - // On success, stores the length in *result and returns true. - // On failure, returns false. - bool ReadUncompressedLength(uint32* result) { - assert(ip_ == NULL); // Must not have read anything yet - // Length is encoded in 1..5 bytes - *result = 0; - uint32 shift = 0; - while (true) { - if (shift >= 32) return false; - size_t n; - const char* ip = reader_->Peek(&n); - if (n == 0) return false; - const unsigned char c = *(reinterpret_cast(ip)); - reader_->Skip(1); - uint32 val = c & 0x7f; - if (LeftShiftOverflows(static_cast(val), shift)) return false; - *result |= val << shift; - if (c < 128) { - break; - } - shift += 7; - } - return true; - } - - // Process the next item found in the input. - // Returns true if successful, false on error or end of input. - template -#if defined(__GNUC__) && defined(__x86_64__) - __attribute__((aligned(32))) -#endif - void DecompressAllTags(Writer* writer) { - // In x86, pad the function body to start 16 bytes later. This function has - // a couple of hotspots that are highly sensitive to alignment: we have - // observed regressions by more than 20% in some metrics just by moving the - // exact same code to a different position in the benchmark binary. - // - // Putting this code on a 32-byte-aligned boundary + 16 bytes makes us hit - // the "lucky" case consistently. Unfortunately, this is a very brittle - // workaround, and future differences in code generation may reintroduce - // this regression. If you experience a big, difficult to explain, benchmark - // performance regression here, first try removing this hack. -#if defined(__GNUC__) && defined(__x86_64__) - // Two 8-byte "NOP DWORD ptr [EAX + EAX*1 + 00000000H]" instructions. - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); - asm(".byte 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00"); -#endif - - const char* ip = ip_; - // We could have put this refill fragment only at the beginning of the loop. - // However, duplicating it at the end of each branch gives the compiler more - // scope to optimize the expression based on the local - // context, which overall increases speed. - #define MAYBE_REFILL() \ - if (ip_limit_ - ip < kMaximumTagLength) { \ - ip_ = ip; \ - if (!RefillTag()) return; \ - ip = ip_; \ - } - - MAYBE_REFILL(); - for ( ;; ) { - const unsigned char c = *(reinterpret_cast(ip++)); - - // Ratio of iterations that have LITERAL vs non-LITERAL for different - // inputs. - // - // input LITERAL NON_LITERAL - // ----------------------------------- - // html|html4|cp 23% 77% - // urls 36% 64% - // jpg 47% 53% - // pdf 19% 81% - // txt[1-4] 25% 75% - // pb 24% 76% - // bin 24% 76% - if (SNAPPY_PREDICT_FALSE((c & 0x3) == LITERAL)) { - size_t literal_length = (c >> 2) + 1u; - if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { - assert(literal_length < 61); - ip += literal_length; - // NOTE: There is no MAYBE_REFILL() here, as TryFastAppend() - // will not return true unless there's already at least five spare - // bytes in addition to the literal. - continue; - } - if (SNAPPY_PREDICT_FALSE(literal_length >= 61)) { - // Long literal. - const size_t literal_length_length = literal_length - 60; - literal_length = - ExtractLowBytes(LittleEndian::Load32(ip), literal_length_length) + - 1; - ip += literal_length_length; - } - - size_t avail = ip_limit_ - ip; - while (avail < literal_length) { - if (!writer->Append(ip, avail)) return; - literal_length -= avail; - reader_->Skip(peeked_); - size_t n; - ip = reader_->Peek(&n); - avail = n; - peeked_ = avail; - if (avail == 0) return; // Premature end of input - ip_limit_ = ip + avail; - } - if (!writer->Append(ip, literal_length)) { - return; - } - ip += literal_length; - MAYBE_REFILL(); - } else { - const size_t entry = char_table[c]; - const size_t trailer = - ExtractLowBytes(LittleEndian::Load32(ip), entry >> 11); - const size_t length = entry & 0xff; - ip += entry >> 11; - - // copy_offset/256 is encoded in bits 8..10. By just fetching - // those bits, we get copy_offset (since the bit-field starts at - // bit 8). - const size_t copy_offset = entry & 0x700; - if (!writer->AppendFromSelf(copy_offset + trailer, length)) { - return; - } - MAYBE_REFILL(); - } - } - -#undef MAYBE_REFILL - } -}; - -bool SnappyDecompressor::RefillTag() { - const char* ip = ip_; - if (ip == ip_limit_) { - // Fetch a new fragment from the reader - reader_->Skip(peeked_); // All peeked bytes are used up - size_t n; - ip = reader_->Peek(&n); - peeked_ = n; - eof_ = (n == 0); - if (eof_) return false; - ip_limit_ = ip + n; - } - - // Read the tag character - assert(ip < ip_limit_); - const unsigned char c = *(reinterpret_cast(ip)); - const uint32 entry = char_table[c]; - const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' - assert(needed <= sizeof(scratch_)); - - // Read more bytes from reader if needed - uint32 nbuf = ip_limit_ - ip; - if (nbuf < needed) { - // Stitch together bytes from ip and reader to form the word - // contents. We store the needed bytes in "scratch_". They - // will be consumed immediately by the caller since we do not - // read more than we need. - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - while (nbuf < needed) { - size_t length; - const char* src = reader_->Peek(&length); - if (length == 0) return false; - uint32 to_add = std::min(needed - nbuf, length); - memcpy(scratch_ + nbuf, src, to_add); - nbuf += to_add; - reader_->Skip(to_add); - } - assert(nbuf == needed); - ip_ = scratch_; - ip_limit_ = scratch_ + needed; - } else if (nbuf < kMaximumTagLength) { - // Have enough bytes, but move into scratch_ so that we do not - // read past end of input - memmove(scratch_, ip, nbuf); - reader_->Skip(peeked_); // All peeked bytes are used up - peeked_ = 0; - ip_ = scratch_; - ip_limit_ = scratch_ + nbuf; - } else { - // Pass pointer to buffer returned by reader_. - ip_ = ip; - } - return true; -} - -template -static bool InternalUncompress(Source* r, Writer* writer) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(r); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; - - return InternalUncompressAllTags(&decompressor, writer, r->Available(), - uncompressed_len); -} - -template -static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, - Writer* writer, - uint32 compressed_len, - uint32 uncompressed_len) { - Report("snappy_uncompress", compressed_len, uncompressed_len); - - writer->SetExpectedLength(uncompressed_len); - - // Process the entire input - decompressor->DecompressAllTags(writer); - writer->Flush(); - return (decompressor->eof() && writer->CheckLength()); -} - -bool GetUncompressedLength(Source* source, uint32* result) { - SnappyDecompressor decompressor(source); - return decompressor.ReadUncompressedLength(result); -} - -size_t Compress(Source* reader, Sink* writer) { - size_t written = 0; - size_t N = reader->Available(); - const size_t uncompressed_size = N; - char ulength[Varint::kMax32]; - char* p = Varint::Encode32(ulength, N); - writer->Append(ulength, p-ulength); - written += (p - ulength); - - internal::WorkingMemory wmem(N); - - while (N > 0) { - // Get next block to compress (without copying if possible) - size_t fragment_size; - const char* fragment = reader->Peek(&fragment_size); - assert(fragment_size != 0); // premature end of input - const size_t num_to_read = std::min(N, kBlockSize); - size_t bytes_read = fragment_size; - - size_t pending_advance = 0; - if (bytes_read >= num_to_read) { - // Buffer returned by reader is large enough - pending_advance = num_to_read; - fragment_size = num_to_read; - } else { - char* scratch = wmem.GetScratchInput(); - memcpy(scratch, fragment, bytes_read); - reader->Skip(bytes_read); - - while (bytes_read < num_to_read) { - fragment = reader->Peek(&fragment_size); - size_t n = std::min(fragment_size, num_to_read - bytes_read); - memcpy(scratch + bytes_read, fragment, n); - bytes_read += n; - reader->Skip(n); - } - assert(bytes_read == num_to_read); - fragment = scratch; - fragment_size = num_to_read; - } - assert(fragment_size == num_to_read); - - // Get encoding table for compression - int table_size; - uint16* table = wmem.GetHashTable(num_to_read, &table_size); - - // Compress input_fragment and append to dest - const int max_output = MaxCompressedLength(num_to_read); - - // Need a scratch buffer for the output, in case the byte sink doesn't - // have room for us directly. - - // Since we encode kBlockSize regions followed by a region - // which is <= kBlockSize in length, a previously allocated - // scratch_output[] region is big enough for this iteration. - char* dest = writer->GetAppendBuffer(max_output, wmem.GetScratchOutput()); - char* end = internal::CompressFragment(fragment, fragment_size, dest, table, - table_size); - writer->Append(dest, end - dest); - written += (end - dest); - - N -= num_to_read; - reader->Skip(pending_advance); - } - - Report("snappy_compress", written, uncompressed_size); - - return written; -} - -// ----------------------------------------------------------------------- -// IOVec interfaces -// ----------------------------------------------------------------------- - -// A type that writes to an iovec. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyIOVecWriter { - private: - // output_iov_end_ is set to iov + count and used to determine when - // the end of the iovs is reached. - const struct iovec* output_iov_end_; - -#if !defined(NDEBUG) - const struct iovec* output_iov_; -#endif // !defined(NDEBUG) - - // Current iov that is being written into. - const struct iovec* curr_iov_; - - // Pointer to current iov's write location. - char* curr_iov_output_; - - // Remaining bytes to write into curr_iov_output. - size_t curr_iov_remaining_; - - // Total bytes decompressed into output_iov_ so far. - size_t total_written_; - - // Maximum number of bytes that will be decompressed into output_iov_. - size_t output_limit_; - - static inline char* GetIOVecPointer(const struct iovec* iov, size_t offset) { - return reinterpret_cast(iov->iov_base) + offset; - } - - public: - // Does not take ownership of iov. iov must be valid during the - // entire lifetime of the SnappyIOVecWriter. - inline SnappyIOVecWriter(const struct iovec* iov, size_t iov_count) - : output_iov_end_(iov + iov_count), -#if !defined(NDEBUG) - output_iov_(iov), -#endif // !defined(NDEBUG) - curr_iov_(iov), - curr_iov_output_(iov_count ? reinterpret_cast(iov->iov_base) - : nullptr), - curr_iov_remaining_(iov_count ? iov->iov_len : 0), - total_written_(0), - output_limit_(-1) {} - - inline void SetExpectedLength(size_t len) { - output_limit_ = len; - } - - inline bool CheckLength() const { - return total_written_ == output_limit_; - } - - inline bool Append(const char* ip, size_t len) { - if (total_written_ + len > output_limit_) { - return false; - } - - return AppendNoCheck(ip, len); - } - - inline bool AppendNoCheck(const char* ip, size_t len) { - while (len > 0) { - if (curr_iov_remaining_ == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - } - - const size_t to_write = std::min(len, curr_iov_remaining_); - memcpy(curr_iov_output_, ip, to_write); - curr_iov_output_ += to_write; - curr_iov_remaining_ -= to_write; - total_written_ += to_write; - ip += to_write; - len -= to_write; - } - - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - const size_t space_left = output_limit_ - total_written_; - if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16 && - curr_iov_remaining_ >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, curr_iov_output_); - curr_iov_output_ += len; - curr_iov_remaining_ -= len; - total_written_ += len; - return true; - } - - return false; - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (offset - 1u >= total_written_) { - return false; - } - const size_t space_left = output_limit_ - total_written_; - if (len > space_left) { - return false; - } - - // Locate the iovec from which we need to start the copy. - const iovec* from_iov = curr_iov_; - size_t from_iov_offset = curr_iov_->iov_len - curr_iov_remaining_; - while (offset > 0) { - if (from_iov_offset >= offset) { - from_iov_offset -= offset; - break; - } - - offset -= from_iov_offset; - --from_iov; -#if !defined(NDEBUG) - assert(from_iov >= output_iov_); -#endif // !defined(NDEBUG) - from_iov_offset = from_iov->iov_len; - } - - // Copy bytes starting from the iovec pointed to by from_iov_index to - // the current iovec. - while (len > 0) { - assert(from_iov <= curr_iov_); - if (from_iov != curr_iov_) { - const size_t to_copy = - std::min((unsigned long)(from_iov->iov_len - from_iov_offset), (unsigned long)len); - AppendNoCheck(GetIOVecPointer(from_iov, from_iov_offset), to_copy); - len -= to_copy; - if (len > 0) { - ++from_iov; - from_iov_offset = 0; - } - } else { - size_t to_copy = curr_iov_remaining_; - if (to_copy == 0) { - // This iovec is full. Go to the next one. - if (curr_iov_ + 1 >= output_iov_end_) { - return false; - } - ++curr_iov_; - curr_iov_output_ = reinterpret_cast(curr_iov_->iov_base); - curr_iov_remaining_ = curr_iov_->iov_len; - continue; - } - if (to_copy > len) { - to_copy = len; - } - - IncrementalCopy(GetIOVecPointer(from_iov, from_iov_offset), - curr_iov_output_, curr_iov_output_ + to_copy, - curr_iov_output_ + curr_iov_remaining_); - curr_iov_output_ += to_copy; - curr_iov_remaining_ -= to_copy; - from_iov_offset += to_copy; - total_written_ += to_copy; - len -= to_copy; - } - } - - return true; - } - - inline void Flush() {} -}; - -bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt) { - ByteArraySource reader(compressed, compressed_length); - return RawUncompressToIOVec(&reader, iov, iov_cnt); -} - -bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, - size_t iov_cnt) { - SnappyIOVecWriter output(iov, iov_cnt); - return InternalUncompress(compressed, &output); -} - -// ----------------------------------------------------------------------- -// Flat array interfaces -// ----------------------------------------------------------------------- - -// A type that writes to a flat array. -// Note that this is not a "ByteSink", but a type that matches the -// Writer template argument to SnappyDecompressor::DecompressAllTags(). -class SnappyArrayWriter { - private: - char* base_; - char* op_; - char* op_limit_; - - public: - inline explicit SnappyArrayWriter(char* dst) - : base_(dst), - op_(dst), - op_limit_(dst) { - } - - inline void SetExpectedLength(size_t len) { - op_limit_ = op_ + len; - } - - inline bool CheckLength() const { - return op_ == op_limit_; - } - - inline bool Append(const char* ip, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (space_left < len) { - return false; - } - memcpy(op, ip, len); - op_ = op + len; - return true; - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t len) { - char* op = op_; - const size_t space_left = op_limit_ - op; - if (len <= 16 && available >= 16 + kMaximumTagLength && space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ = op + len; - return true; - } else { - return false; - } - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ + len; - - // Check if we try to append from before the start of the buffer. - // Normally this would just be a check for "produced < offset", - // but "produced <= offset - 1u" is equivalent for every case - // except the one where offset==0, where the right side will wrap around - // to a very big number. This is convenient, as offset==0 is another - // invalid case that we also want to catch, so that we do not go - // into an infinite loop. - if (Produced() <= offset - 1u || op_end > op_limit_) return false; - op_ = IncrementalCopy(op_ - offset, op_, op_end, op_limit_); - - return true; - } - inline size_t Produced() const { - assert(op_ >= base_); - return op_ - base_; - } - inline void Flush() {} -}; - -bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { - ByteArraySource reader(compressed, n); - return RawUncompress(&reader, uncompressed); -} - -bool RawUncompress(Source* compressed, char* uncompressed) { - SnappyArrayWriter output(uncompressed); - return InternalUncompress(compressed, &output); -} - -bool Uncompress(const char* compressed, size_t n, string* uncompressed) { - size_t ulength; - if (!GetUncompressedLength(compressed, n, &ulength)) { - return false; - } - // On 32-bit builds: max_size() < kuint32max. Check for that instead - // of crashing (e.g., consider externally specified compressed data). - if (ulength > uncompressed->max_size()) { - return false; - } - STLStringResizeUninitialized(uncompressed, ulength); - return RawUncompress(compressed, n, string_as_array(uncompressed)); -} - -// A Writer that drops everything on the floor and just does validation -class SnappyDecompressionValidator { - private: - size_t expected_; - size_t produced_; - - public: - inline SnappyDecompressionValidator() : expected_(0), produced_(0) { } - inline void SetExpectedLength(size_t len) { - expected_ = len; - } - inline bool CheckLength() const { - return expected_ == produced_; - } - inline bool Append(const char* ip, size_t len) { - produced_ += len; - return produced_ <= expected_; - } - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - return false; - } - inline bool AppendFromSelf(size_t offset, size_t len) { - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (produced_ <= offset - 1u) return false; - produced_ += len; - return produced_ <= expected_; - } - inline void Flush() {} -}; - -bool IsValidCompressedBuffer(const char* compressed, size_t n) { - ByteArraySource reader(compressed, n); - SnappyDecompressionValidator writer; - return InternalUncompress(&reader, &writer); -} - -bool IsValidCompressed(Source* compressed) { - SnappyDecompressionValidator writer; - return InternalUncompress(compressed, &writer); -} - -void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length) { - ByteArraySource reader(input, input_length); - UncheckedByteArraySink writer(compressed); - Compress(&reader, &writer); - - // Compute how many bytes were added - *compressed_length = (writer.CurrentDestination() - compressed); -} - -size_t Compress(const char* input, size_t input_length, string* compressed) { - // Pre-grow the buffer to the max length of the compressed output - STLStringResizeUninitialized(compressed, MaxCompressedLength(input_length)); - - size_t compressed_length; - RawCompress(input, input_length, string_as_array(compressed), - &compressed_length); - compressed->resize(compressed_length); - return compressed_length; -} - -// ----------------------------------------------------------------------- -// Sink interface -// ----------------------------------------------------------------------- - -// A type that decompresses into a Sink. The template parameter -// Allocator must export one method "char* Allocate(int size);", which -// allocates a buffer of "size" and appends that to the destination. -template -class SnappyScatteredWriter { - Allocator allocator_; - - // We need random access into the data generated so far. Therefore - // we keep track of all of the generated data as an array of blocks. - // All of the blocks except the last have length kBlockSize. - std::vector blocks_; - size_t expected_; - - // Total size of all fully generated blocks so far - size_t full_size_; - - // Pointer into current output block - char* op_base_; // Base of output block - char* op_ptr_; // Pointer to next unfilled byte in block - char* op_limit_; // Pointer just past block - - inline size_t Size() const { - return full_size_ + (op_ptr_ - op_base_); - } - - bool SlowAppend(const char* ip, size_t len); - bool SlowAppendFromSelf(size_t offset, size_t len); - - public: - inline explicit SnappyScatteredWriter(const Allocator& allocator) - : allocator_(allocator), - full_size_(0), - op_base_(NULL), - op_ptr_(NULL), - op_limit_(NULL) { - } - - inline void SetExpectedLength(size_t len) { - assert(blocks_.empty()); - expected_ = len; - } - - inline bool CheckLength() const { - return Size() == expected_; - } - - // Return the number of bytes actually uncompressed so far - inline size_t Produced() const { - return Size(); - } - - inline bool Append(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - if (len <= avail) { - // Fast path - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; - } else { - return SlowAppend(ip, len); - } - } - - inline bool TryFastAppend(const char* ip, size_t available, size_t length) { - char* op = op_ptr_; - const int space_left = op_limit_ - op; - if (length <= 16 && available >= 16 + kMaximumTagLength && - space_left >= 16) { - // Fast path, used for the majority (about 95%) of invocations. - UnalignedCopy128(ip, op); - op_ptr_ = op + length; - return true; - } else { - return false; - } - } - - inline bool AppendFromSelf(size_t offset, size_t len) { - char* const op_end = op_ptr_ + len; - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - if (SNAPPY_PREDICT_TRUE(offset - 1u < (size_t)(op_ptr_ - op_base_) && - op_end <= op_limit_)) { - // Fast path: src and dst in current block. - op_ptr_ = IncrementalCopy(op_ptr_ - offset, op_ptr_, op_end, op_limit_); - return true; - } - return SlowAppendFromSelf(offset, len); - } - - // Called at the end of the decompress. We ask the allocator - // write all blocks to the sink. - inline void Flush() { allocator_.Flush(Produced()); } -}; - -template -bool SnappyScatteredWriter::SlowAppend(const char* ip, size_t len) { - size_t avail = op_limit_ - op_ptr_; - while (len > avail) { - // Completely fill this block - memcpy(op_ptr_, ip, avail); - op_ptr_ += avail; - assert(op_limit_ - op_ptr_ == 0); - full_size_ += (op_ptr_ - op_base_); - len -= avail; - ip += avail; - - // Bounds check - if (full_size_ + len > expected_) { - return false; - } - - // Make new block - size_t bsize = std::min(kBlockSize, expected_ - full_size_); - op_base_ = allocator_.Allocate(bsize); - op_ptr_ = op_base_; - op_limit_ = op_base_ + bsize; - blocks_.push_back(op_base_); - avail = bsize; - } - - memcpy(op_ptr_, ip, len); - op_ptr_ += len; - return true; -} - -template -bool SnappyScatteredWriter::SlowAppendFromSelf(size_t offset, - size_t len) { - // Overflow check - // See SnappyArrayWriter::AppendFromSelf for an explanation of - // the "offset - 1u" trick. - const size_t cur = Size(); - if (offset - 1u >= cur) return false; - if (expected_ - cur < len) return false; - - // Currently we shouldn't ever hit this path because Compress() chops the - // input into blocks and does not create cross-block copies. However, it is - // nice if we do not rely on that, since we can get better compression if we - // allow cross-block copies and thus might want to change the compressor in - // the future. - size_t src = cur - offset; - while (len-- > 0) { - char c = blocks_[src >> kBlockLog][src & (kBlockSize-1)]; - Append(&c, 1); - src++; - } - return true; -} - -class SnappySinkAllocator { - public: - explicit SnappySinkAllocator(Sink* dest): dest_(dest) {} - ~SnappySinkAllocator() {} - - char* Allocate(int size) { - Datablock block(new char[size], size); - blocks_.push_back(block); - return block.data; - } - - // We flush only at the end, because the writer wants - // random access to the blocks and once we hand the - // block over to the sink, we can't access it anymore. - // Also we don't write more than has been actually written - // to the blocks. - void Flush(size_t size) { - size_t size_written = 0; - size_t block_size; - for (size_t i = 0; i < blocks_.size(); ++i) { - block_size = std::min(blocks_[i].size, size - size_written); - dest_->AppendAndTakeOwnership(blocks_[i].data, block_size, - &SnappySinkAllocator::Deleter, NULL); - size_written += block_size; - } - blocks_.clear(); - } - - private: - struct Datablock { - char* data; - size_t size; - Datablock(char* p, size_t s) : data(p), size(s) {} - }; - - static void Deleter(void* arg, const char* bytes, size_t size) { - delete[] bytes; - } - - Sink* dest_; - std::vector blocks_; - - // Note: copying this object is allowed -}; - -size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed) { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - InternalUncompress(compressed, &writer); - return writer.Produced(); -} - -bool Uncompress(Source* compressed, Sink* uncompressed) { - // Read the uncompressed length from the front of the compressed input - SnappyDecompressor decompressor(compressed); - uint32 uncompressed_len = 0; - if (!decompressor.ReadUncompressedLength(&uncompressed_len)) { - return false; - } - - char c; - size_t allocated_size; - char* buf = uncompressed->GetAppendBufferVariable( - 1, uncompressed_len, &c, 1, &allocated_size); - - const size_t compressed_len = compressed->Available(); - // If we can get a flat buffer, then use it, otherwise do block by block - // uncompression - if (allocated_size >= uncompressed_len) { - SnappyArrayWriter writer(buf); - bool result = InternalUncompressAllTags(&decompressor, &writer, - compressed_len, uncompressed_len); - uncompressed->Append(buf, writer.Produced()); - return result; - } else { - SnappySinkAllocator allocator(uncompressed); - SnappyScatteredWriter writer(allocator); - return InternalUncompressAllTags(&decompressor, &writer, compressed_len, - uncompressed_len); - } -} - -} // namespace duckdb_snappy diff --git a/src/duckdb/third_party/snappy/snappy.h b/src/duckdb/third_party/snappy/snappy.h deleted file mode 100644 index 0172537ff..000000000 --- a/src/duckdb/third_party/snappy/snappy.h +++ /dev/null @@ -1,189 +0,0 @@ -// Copyright 2005 and onwards Google Inc. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// A light-weight compression algorithm. It is designed for speed of -// compression and decompression, rather than for the utmost in space -// savings. -// -// For getting better compression ratios when you are compressing data -// with long repeated sequences or compressing data that is similar to -// other data, while still compressing fast, you might look at first -// using BMDiff and then compressing the output of BMDiff with -// Snappy. - -#ifndef THIRD_PARTY_SNAPPY_SNAPPY_H__ -#define THIRD_PARTY_SNAPPY_SNAPPY_H__ - -#include -#include - -#include "snappy-stubs-public.h" - -namespace duckdb_snappy { - class Source; - class Sink; - - // ------------------------------------------------------------------------ - // Generic compression/decompression routines. - // ------------------------------------------------------------------------ - - // Compress the bytes read from "*source" and append to "*sink". Return the - // number of bytes written. - size_t Compress(Source* source, Sink* sink); - - // Find the uncompressed length of the given stream, as given by the header. - // Note that the true length could deviate from this; the stream could e.g. - // be truncated. - // - // Also note that this leaves "*source" in a state that is unsuitable for - // further operations, such as RawUncompress(). You will need to rewind - // or recreate the source yourself before attempting any further calls. - bool GetUncompressedLength(Source* source, uint32* result); - - // ------------------------------------------------------------------------ - // Higher-level string based routines (should be sufficient for most users) - // ------------------------------------------------------------------------ - - // Sets "*output" to the compressed version of "input[0,input_length-1]". - // Original contents of *output are lost. - // - // REQUIRES: "input[]" is not an alias of "*output". - size_t Compress(const char* input, size_t input_length, string* output); - - // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". - // Original contents of "*uncompressed" are lost. - // - // REQUIRES: "compressed[]" is not an alias of "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(const char* compressed, size_t compressed_length, - string* uncompressed); - - // Decompresses "compressed" to "*uncompressed". - // - // returns false if the message is corrupted and could not be decompressed - bool Uncompress(Source* compressed, Sink* uncompressed); - - // This routine uncompresses as much of the "compressed" as possible - // into sink. It returns the number of valid bytes added to sink - // (extra invalid bytes may have been added due to errors; the caller - // should ignore those). The emitted data typically has length - // GetUncompressedLength(), but may be shorter if an error is - // encountered. - size_t UncompressAsMuchAsPossible(Source* compressed, Sink* uncompressed); - - // ------------------------------------------------------------------------ - // Lower-level character array based routines. May be useful for - // efficiency reasons in certain circumstances. - // ------------------------------------------------------------------------ - - // REQUIRES: "compressed" must point to an area of memory that is at - // least "MaxCompressedLength(input_length)" bytes in length. - // - // Takes the data stored in "input[0..input_length]" and stores - // it in the array pointed to by "compressed". - // - // "*compressed_length" is set to the length of the compressed output. - // - // Example: - // char* output = new char[snappy::MaxCompressedLength(input_length)]; - // size_t output_length; - // RawCompress(input, input_length, output, &output_length); - // ... Process(output, output_length) ... - // delete [] output; - void RawCompress(const char* input, - size_t input_length, - char* compressed, - size_t* compressed_length); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to - // uncompressed[0..GetUncompressedLength(compressed)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(const char* compressed, size_t compressed_length, - char* uncompressed); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to - // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] - // returns false if the message is corrupted and could not be decrypted - bool RawUncompress(Source* compressed, char* uncompressed); - - // Given data in "compressed[0..compressed_length-1]" generated by - // calling the Snappy::Compress routine, this routine - // stores the uncompressed data to the iovec "iov". The number of physical - // buffers in "iov" is given by iov_cnt and their cumulative size - // must be at least GetUncompressedLength(compressed). The individual buffers - // in "iov" must not overlap with each other. - // - // returns false if the message is corrupted and could not be decrypted - bool RawUncompressToIOVec(const char* compressed, size_t compressed_length, - const struct iovec* iov, size_t iov_cnt); - - // Given data from the byte source 'compressed' generated by calling - // the Snappy::Compress routine, this routine stores the uncompressed - // data to the iovec "iov". The number of physical - // buffers in "iov" is given by iov_cnt and their cumulative size - // must be at least GetUncompressedLength(compressed). The individual buffers - // in "iov" must not overlap with each other. - // - // returns false if the message is corrupted and could not be decrypted - bool RawUncompressToIOVec(Source* compressed, const struct iovec* iov, - size_t iov_cnt); - - // Returns the maximal size of the compressed representation of - // input data that is "source_bytes" bytes in length; - size_t MaxCompressedLength(size_t source_bytes); - - // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() - // Returns true and stores the length of the uncompressed data in - // *result normally. Returns false on parsing error. - // This operation takes O(1) time. - bool GetUncompressedLength(const char* compressed, size_t compressed_length, - size_t* result); - - // Returns true iff the contents of "compressed[]" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to compressed_length, but is usually at least - // a factor of four faster than actual decompression. - bool IsValidCompressedBuffer(const char* compressed, - size_t compressed_length); - - // Returns true iff the contents of "compressed" can be uncompressed - // successfully. Does not return the uncompressed data. Takes - // time proportional to *compressed length, but is usually at least - // a factor of four faster than actual decompression. - // On success, consumes all of *compressed. On failure, consumes an - // unspecified prefix of *compressed. - bool IsValidCompressed(Source* compressed); - -} // end namespace duckdb_snappy - -#endif // THIRD_PARTY_SNAPPY_SNAPPY_H__ diff --git a/src/duckdb/third_party/thrift/thrift/TApplicationException.h b/src/duckdb/third_party/thrift/thrift/TApplicationException.h deleted file mode 100644 index f64d51043..000000000 --- a/src/duckdb/third_party/thrift/thrift/TApplicationException.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ -#define _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ 1 - -#include "thrift/Thrift.h" - -namespace duckdb_apache { -namespace thrift { - -namespace protocol { -class TProtocol; -} - -class TApplicationException : public TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TApplicationExceptionType { - UNKNOWN = 0, - UNKNOWN_METHOD = 1, - INVALID_MESSAGE_TYPE = 2, - WRONG_METHOD_NAME = 3, - BAD_SEQUENCE_ID = 4, - MISSING_RESULT = 5, - INTERNAL_ERROR = 6, - PROTOCOL_ERROR = 7, - INVALID_TRANSFORM = 8, - INVALID_PROTOCOL = 9, - UNSUPPORTED_CLIENT_TYPE = 10 - }; - - TApplicationException() : TException(), type_(UNKNOWN) {} - - TApplicationException(TApplicationExceptionType type) : TException(), type_(type) {} - - TApplicationException(const std::string& message) : TException(message), type_(UNKNOWN) {} - - TApplicationException(TApplicationExceptionType type, const std::string& message) - : TException(message), type_(type) {} - - ~TApplicationException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TApplicationExceptionType getType() const { return type_; } - - const char* what() const noexcept override { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TApplicationException: Unknown application exception"; - case UNKNOWN_METHOD: - return "TApplicationException: Unknown method"; - case INVALID_MESSAGE_TYPE: - return "TApplicationException: Invalid message type"; - case WRONG_METHOD_NAME: - return "TApplicationException: Wrong method name"; - case BAD_SEQUENCE_ID: - return "TApplicationException: Bad sequence identifier"; - case MISSING_RESULT: - return "TApplicationException: Missing result"; - case INTERNAL_ERROR: - return "TApplicationException: Internal error"; - case PROTOCOL_ERROR: - return "TApplicationException: Protocol error"; - case INVALID_TRANSFORM: - return "TApplicationException: Invalid transform"; - case INVALID_PROTOCOL: - return "TApplicationException: Invalid protocol"; - case UNSUPPORTED_CLIENT_TYPE: - return "TApplicationException: Unsupported client type"; - default: - return "TApplicationException: (Invalid exception type)"; - }; - } else { - return message_.c_str(); - } - } - -protected: - /** - * Error code - */ - TApplicationExceptionType type_; -}; -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_TAPPLICATIONEXCEPTION_H_ diff --git a/src/duckdb/third_party/thrift/thrift/TBase.h b/src/duckdb/third_party/thrift/thrift/TBase.h deleted file mode 100644 index 5b8cf493d..000000000 --- a/src/duckdb/third_party/thrift/thrift/TBase.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TBASE_H_ -#define _DUCKDB_THRIFT_TBASE_H_ 1 - -#include "thrift/Thrift.h" -#include "thrift/protocol/TProtocol.h" - -namespace duckdb_apache { -namespace thrift { - -class TBase { -public: - virtual ~TBase() = default; - virtual uint32_t read(protocol::TProtocol* iprot) = 0; - virtual uint32_t write(protocol::TProtocol* oprot) const = 0; -}; -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_TBASE_H_ diff --git a/src/duckdb/third_party/thrift/thrift/TLogging.h b/src/duckdb/third_party/thrift/thrift/TLogging.h deleted file mode 100644 index 1f888e963..000000000 --- a/src/duckdb/third_party/thrift/thrift/TLogging.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TLOGGING_H_ -#define _DUCKDB_THRIFT_TLOGGING_H_ 1 - -#include "thrift/thrift-config.h" - -/** - * Contains utility macros for debugging and logging. - * - */ - -#include - -#ifdef HAVE_STDINT_H -#include -#endif - -/** - * T_GLOBAL_DEBUGGING_LEVEL = 0: all debugging turned off, debug macros undefined - * T_GLOBAL_DEBUGGING_LEVEL = 1: all debugging turned on - */ -#define T_GLOBAL_DEBUGGING_LEVEL 0 - -/** - * T_GLOBAL_LOGGING_LEVEL = 0: all logging turned off, logging macros undefined - * T_GLOBAL_LOGGING_LEVEL = 1: all logging turned on - */ -#define T_GLOBAL_LOGGING_LEVEL 0 - -/** - * Standard wrapper around fprintf what will prefix the file name and line - * number to the line. Uses T_GLOBAL_DEBUGGING_LEVEL to control whether it is - * turned on or off. - * - * @param format_string - */ -#if T_GLOBAL_DEBUGGING_LEVEL > 0 -#define T_DEBUG(format_string, ...) \ - if (T_GLOBAL_DEBUGGING_LEVEL > 0) { \ - fprintf(stderr, "[%s,%d] " format_string " \n", __FILE__, __LINE__, ##__VA_ARGS__); \ - } -#else -#define T_DEBUG(format_string, ...) -#endif - -/** - * analogous to T_DEBUG but also prints the time - * - * @param string format_string input: printf style format string - */ -#if T_GLOBAL_DEBUGGING_LEVEL > 0 -#define T_DEBUG_T(format_string, ...) \ - { \ - if (T_GLOBAL_DEBUGGING_LEVEL > 0) { \ - time_t now; \ - char dbgtime[26]; \ - time(&now); \ - THRIFT_CTIME_R(&now, dbgtime); \ - dbgtime[24] = '\0'; \ - fprintf(stderr, \ - "[%s,%d] [%s] " format_string " \n", \ - __FILE__, \ - __LINE__, \ - dbgtime, \ - ##__VA_ARGS__); \ - } \ - } -#else -#define T_DEBUG_T(format_string, ...) -#endif - - - - - -/** - * Log input message - * - * @param string format_string input: printf style format string - */ -#if T_GLOBAL_LOGGING_LEVEL > 0 -#define T_LOG_OPER(format_string, ...) \ - { \ - if (T_GLOBAL_LOGGING_LEVEL > 0) { \ - time_t now; \ - char dbgtime[26]; \ - time(&now); \ - THRIFT_CTIME_R(&now, dbgtime); \ - dbgtime[24] = '\0'; \ - fprintf(stderr, "[%s] " format_string " \n", dbgtime, ##__VA_ARGS__); \ - } \ - } -#else -#define T_LOG_OPER(format_string, ...) -#endif - -/** - * T_GLOBAL_DEBUG_VIRTUAL = 0 or unset: normal operation, - * virtual call debug messages disabled - * T_GLOBAL_DEBUG_VIRTUAL = 1: log a debug messages whenever an - * avoidable virtual call is made - * T_GLOBAL_DEBUG_VIRTUAL = 2: record detailed info that can be - * printed by calling - * duckdb_apache::thrift::profile_print_info() - */ -#if T_GLOBAL_DEBUG_VIRTUAL > 1 -#define T_VIRTUAL_CALL() ::duckdb_apache::thrift::profile_virtual_call(typeid(*this)) -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) \ - do { \ - if (!(specific_prot)) { \ - ::duckdb_apache::thrift::profile_generic_protocol(typeid(*template_class), typeid(*generic_prot)); \ - } \ - } while (0) -#elif T_GLOBAL_DEBUG_VIRTUAL == 1 -#define T_VIRTUAL_CALL() fprintf(stderr, "[%s,%d] virtual call\n", __FILE__, __LINE__) -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) \ - do { \ - if (!(specific_prot)) { \ - fprintf(stderr, "[%s,%d] failed to cast to specific protocol type\n", __FILE__, __LINE__); \ - } \ - } while (0) -#else -#define T_VIRTUAL_CALL() -#define T_GENERIC_PROTOCOL(template_class, generic_prot, specific_prot) -#endif - -#endif // #ifndef _DUCKDB_THRIFT_TLOGGING_H_ diff --git a/src/duckdb/third_party/thrift/thrift/TToString.h b/src/duckdb/third_party/thrift/thrift/TToString.h deleted file mode 100644 index 38227012a..000000000 --- a/src/duckdb/third_party/thrift/thrift/TToString.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TOSTRING_H_ -#define _DUCKDB_THRIFT_TOSTRING_H_ 1 - -#include -#include -#include -#include -#include -#include -#include "duckdb/common/vector.hpp" - -namespace duckdb_apache { -namespace thrift { - -template -std::string to_string(const T& t) { - std::ostringstream o; - o << t; - return o.str(); -} - -// TODO: replace the computations below with std::numeric_limits::max_digits10 once C++11 -// is enabled. -inline std::string to_string(const float& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -inline std::string to_string(const double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -inline std::string to_string(const long double& t) { - std::ostringstream o; - o.precision(static_cast(std::ceil(static_cast(std::numeric_limits::digits * std::log10(2.0f) + 1)))); - o << t; - return o.str(); -} - -template -std::string to_string(const std::map& m); - -template -std::string to_string(const std::set& s); - -template -std::string to_string(const duckdb::vector& t); - -template -std::string to_string(const typename std::pair& v) { - std::ostringstream o; - o << to_string(v.first) << ": " << to_string(v.second); - return o.str(); -} - -template -std::string to_string(const T& beg, const T& end) { - std::ostringstream o; - for (T it = beg; it != end; ++it) { - if (it != beg) - o << ", "; - o << to_string(*it); - } - return o.str(); -} - -template -std::string to_string(const duckdb::vector& t) { - std::ostringstream o; - o << "[" << to_string(t.begin(), t.end()) << "]"; - return o.str(); -} - -template -std::string to_string(const std::map& m) { - std::ostringstream o; - o << "{" << to_string(m.begin(), m.end()) << "}"; - return o.str(); -} - -template -std::string to_string(const std::set& s) { - std::ostringstream o; - o << "{" << to_string(s.begin(), s.end()) << "}"; - return o.str(); -} -} -} // duckdb_apache::thrift - -#endif // _DUCKDB_THRIFT_TOSTRING_H_ diff --git a/src/duckdb/third_party/thrift/thrift/Thrift.h b/src/duckdb/third_party/thrift/thrift/Thrift.h deleted file mode 100644 index e9315f785..000000000 --- a/src/duckdb/third_party/thrift/thrift/Thrift.h +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_THRIFT_H_ -#define _DUCKDB_THRIFT_THRIFT_H_ 1 - -#include "thrift/transport/PlatformSocket.h" - -#include "thrift/thrift-config.h" - -#include -#include - -#include -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_INTTYPES_H -#include -#endif -#include -#include -#include -#include -#include -#include -#include -#include - -#include "thrift/TLogging.h" -//#include - -#define THRIFT_UNUSED_VARIABLE(x) ((void)(x)) - -namespace duckdb_apache { -namespace thrift { - -class TEnumIterator { -public: - using value_type = std::pair; - using difference_type = std::ptrdiff_t; - using pointer = value_type*; - using reference = value_type&; - using iterator_category = std::forward_iterator_tag; - - TEnumIterator(int n, int* enums, const char** names) - : ii_(0), n_(n), enums_(enums), names_(names) {} - - int operator++() { return ++ii_; } - - bool operator!=(const TEnumIterator& end) { - THRIFT_UNUSED_VARIABLE(end); - assert(end.n_ == -1); - return (ii_ != n_); - } - - std::pair operator*() const { return std::make_pair(enums_[ii_], names_[ii_]); } - -private: - int ii_; - const int n_; - int* enums_; - const char** names_; -}; - -class TException : public std::exception { -public: - TException() : message_() {} - - TException(const std::string& message) : message_(message) {} - - ~TException() noexcept override = default; - - const char* what() const noexcept override { - if (message_.empty()) { - return "Default TException."; - } else { - return message_.c_str(); - } - } - -protected: - std::string message_; -}; - -class TDelayedException { -public: - template - static TDelayedException* delayException(const E& e); - virtual void throw_it() = 0; - virtual ~TDelayedException() = default; -}; - -template -class TExceptionWrapper : public TDelayedException { -public: - TExceptionWrapper(const E& e) : e_(e) {} - void throw_it() override { - E temp(e_); - delete this; - throw temp; - } - -private: - E e_; -}; - -template -TDelayedException* TDelayedException::delayException(const E& e) { - return new TExceptionWrapper(e); -} - -#if T_GLOBAL_DEBUG_VIRTUAL > 1 -void profile_virtual_call(const std::type_info& info); -void profile_generic_protocol(const std::type_info& template_type, const std::type_info& prot_type); -void profile_print_info(FILE* f); -void profile_print_info(); -void profile_write_pprof(FILE* gen_calls_f, FILE* virtual_calls_f); -#endif -} -} // duckdb_apache::thrift - -#endif // #ifndef _DUCKDB_THRIFT_THRIFT_H_ diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.h b/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.h deleted file mode 100644 index e250076a8..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_H_ 1 - -#include "thrift/protocol/TVirtualProtocol.h" - -#include -#include - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -/** - * C++ Implementation of the Compact Protocol as described in THRIFT-110 - */ -template -class TCompactProtocolT : public TVirtualProtocol > { -public: - static const int8_t PROTOCOL_ID = (int8_t)0x82u; - static const int8_t VERSION_N = 1; - static const int8_t VERSION_MASK = 0x1f; // 0001 1111 - -protected: - static const int8_t TYPE_MASK = (int8_t)0xE0u; // 1110 0000 - static const int8_t TYPE_BITS = 0x07; // 0000 0111 - static const int32_t TYPE_SHIFT_AMOUNT = 5; - - Transport_* trans_; - - /** - * (Writing) If we encounter a boolean field begin, save the TField here - * so it can have the value incorporated. - */ - struct { - const char* name; - TType fieldType; - int16_t fieldId; - } booleanField_; - - /** - * (Reading) If we read a field header, and it's a boolean field, save - * the boolean value here so that readBool can use it. - */ - struct { - bool hasBoolValue; - bool boolValue; - } boolValue_; - - /** - * Used to keep track of the last field for the current and previous structs, - * so we can do the delta stuff. - */ - - std::stack lastField_; - int16_t lastFieldId_; - -public: - TCompactProtocolT(std::shared_ptr trans) - : TVirtualProtocol >(trans), - trans_(trans.get()), - lastFieldId_(0), - string_limit_(0), - string_buf_(nullptr), - string_buf_size_(0), - container_limit_(0) { - booleanField_.name = nullptr; - boolValue_.hasBoolValue = false; - } - - TCompactProtocolT(std::shared_ptr trans, - int32_t string_limit, - int32_t container_limit) - : TVirtualProtocol >(trans), - trans_(trans.get()), - lastFieldId_(0), - string_limit_(string_limit), - string_buf_(nullptr), - string_buf_size_(0), - container_limit_(container_limit) { - booleanField_.name = nullptr; - boolValue_.hasBoolValue = false; - } - - ~TCompactProtocolT() override { free(string_buf_); } - - /** - * Writing functions - */ - - virtual uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid); - - uint32_t writeStructBegin(const char* name); - - uint32_t writeStructEnd(); - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId); - - uint32_t writeFieldStop(); - - uint32_t writeListBegin(const TType elemType, const uint32_t size); - - uint32_t writeSetBegin(const TType elemType, const uint32_t size); - - virtual uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size); - - uint32_t writeBool(const bool value); - - uint32_t writeByte(const int8_t byte); - - uint32_t writeI16(const int16_t i16); - - uint32_t writeI32(const int32_t i32); - - uint32_t writeI64(const int64_t i64); - - uint32_t writeDouble(const double dub); - - uint32_t writeString(const std::string& str); - - uint32_t writeBinary(const std::string& str); - - /** - * These methods are called by structs, but don't actually have any wired - * output or purpose - */ - virtual uint32_t writeMessageEnd() { return 0; } - uint32_t writeMapEnd() { return 0; } - uint32_t writeListEnd() { return 0; } - uint32_t writeSetEnd() { return 0; } - uint32_t writeFieldEnd() { return 0; } - -protected: - int32_t writeFieldBeginInternal(const char* name, - const TType fieldType, - const int16_t fieldId, - int8_t typeOverride); - uint32_t writeCollectionBegin(const TType elemType, int32_t size); - uint32_t writeVarint32(uint32_t n); - uint32_t writeVarint64(uint64_t n); - uint64_t i64ToZigzag(const int64_t l); - uint32_t i32ToZigzag(const int32_t n); - inline int8_t getCompactType(const TType ttype); - -public: - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid); - - uint32_t readStructBegin(std::string& name); - - uint32_t readStructEnd(); - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId); - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size); - - uint32_t readListBegin(TType& elemType, uint32_t& size); - - uint32_t readSetBegin(TType& elemType, uint32_t& size); - - uint32_t readBool(bool& value); - // Provide the default readBool() implementation for std::vector - using TVirtualProtocol >::readBool; - - uint32_t readByte(int8_t& byte); - - uint32_t readI16(int16_t& i16); - - uint32_t readI32(int32_t& i32); - - uint32_t readI64(int64_t& i64); - - uint32_t readDouble(double& dub); - - uint32_t readString(std::string& str); - - uint32_t readBinary(std::string& str); - - /* - *These methods are here for the struct to call, but don't have any wire - * encoding. - */ - uint32_t readMessageEnd() { return 0; } - uint32_t readFieldEnd() { return 0; } - uint32_t readMapEnd() { return 0; } - uint32_t readListEnd() { return 0; } - uint32_t readSetEnd() { return 0; } - -protected: - uint32_t readVarint32(int32_t& i32); - uint32_t readVarint64(int64_t& i64); - int32_t zigzagToI32(uint32_t n); - int64_t zigzagToI64(uint64_t n); - TType getTType(int8_t type); - - // Buffer for reading strings, save for the lifetime of the protocol to - // avoid memory churn allocating memory on every string read - int32_t string_limit_; - uint8_t* string_buf_; - int32_t string_buf_size_; - int32_t container_limit_; -}; - -typedef TCompactProtocolT TCompactProtocol; - -/** - * Constructs compact protocol handlers - */ -template -class TCompactProtocolFactoryT : public TProtocolFactory { -public: - TCompactProtocolFactoryT() : string_limit_(0), container_limit_(0) {} - - TCompactProtocolFactoryT(int32_t string_limit, int32_t container_limit) - : string_limit_(string_limit), container_limit_(container_limit) {} - - ~TCompactProtocolFactoryT() override = default; - - void setStringSizeLimit(int32_t string_limit) { string_limit_ = string_limit; } - - void setContainerSizeLimit(int32_t container_limit) { container_limit_ = container_limit; } - - std::shared_ptr getProtocol(std::shared_ptr trans) override { - std::shared_ptr specific_trans = std::static_pointer_cast(trans); - TProtocol* prot; - if (specific_trans) { - prot = new TCompactProtocolT(specific_trans, string_limit_, container_limit_); - } else { - prot = new TCompactProtocol(trans, string_limit_, container_limit_); - } - - return std::shared_ptr(prot); - } - -private: - int32_t string_limit_; - int32_t container_limit_; -}; - -typedef TCompactProtocolFactoryT TCompactProtocolFactory; -} -} -} // duckdb_apache::thrift::protocol - -#include "thrift/protocol/TCompactProtocol.tcc" - -#endif diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.tcc b/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.tcc deleted file mode 100644 index e22e7214a..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TCompactProtocol.tcc +++ /dev/null @@ -1,826 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -#ifndef _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ -#define _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ 1 - -#include - -/* - * TCompactProtocol::i*ToZigzag depend on the fact that the right shift - * operator on a signed integer is an arithmetic (sign-extending) shift. - * If this is not the case, the current implementation will not work. - * If anyone encounters this error, we can try to figure out the best - * way to implement an arithmetic right shift on their platform. - */ -#if !defined(SIGNED_RIGHT_SHIFT_IS) || !defined(ARITHMETIC_RIGHT_SHIFT) -# error "Unable to determine the behavior of a signed right shift" -#endif -#if SIGNED_RIGHT_SHIFT_IS != ARITHMETIC_RIGHT_SHIFT -# error "TCompactProtocol currently only works if a signed right shift is arithmetic" -#endif - -#ifndef UNLIKELY -#ifdef __GNUC__ -#define UNLIKELY(val) (__builtin_expect((val), 0)) -#else -#define UNLIKELY(val) (val) -#endif -#endif - -namespace duckdb_apache { namespace thrift { namespace protocol { - -namespace detail { namespace compact { - -enum Types { - CT_STOP = 0x00, - CT_BOOLEAN_TRUE = 0x01, - CT_BOOLEAN_FALSE = 0x02, - CT_BYTE = 0x03, - CT_I16 = 0x04, - CT_I32 = 0x05, - CT_I64 = 0x06, - CT_DOUBLE = 0x07, - CT_BINARY = 0x08, - CT_LIST = 0x09, - CT_SET = 0x0A, - CT_MAP = 0x0B, - CT_STRUCT = 0x0C -}; - -const int8_t TTypeToCType[16] = { - CT_STOP, // T_STOP - 0, // unused - CT_BOOLEAN_TRUE, // T_BOOL - CT_BYTE, // T_BYTE - CT_DOUBLE, // T_DOUBLE - 0, // unused - CT_I16, // T_I16 - 0, // unused - CT_I32, // T_I32 - 0, // unused - CT_I64, // T_I64 - CT_BINARY, // T_STRING - CT_STRUCT, // T_STRUCT - CT_MAP, // T_MAP - CT_SET, // T_SET - CT_LIST, // T_LIST -}; - -}} // end detail::compact namespace - - -template -uint32_t TCompactProtocolT::writeMessageBegin( - const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - uint32_t wsize = 0; - wsize += writeByte(PROTOCOL_ID); - wsize += writeByte((VERSION_N & VERSION_MASK) | (((int32_t)messageType << TYPE_SHIFT_AMOUNT) & TYPE_MASK)); - wsize += writeVarint32(seqid); - wsize += writeString(name); - return wsize; -} - -/** - * Write a field header containing the field id and field type. If the - * difference between the current field id and the last one is small (< 15), - * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the - * field id will follow the type header as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeFieldBegin(const char* name, - const TType fieldType, - const int16_t fieldId) { - if (fieldType == T_BOOL) { - booleanField_.name = name; - booleanField_.fieldType = fieldType; - booleanField_.fieldId = fieldId; - } else { - return writeFieldBeginInternal(name, fieldType, fieldId, -1); - } - return 0; -} - -/** - * Write the STOP symbol so we know there are no more fields in this struct. - */ -template -uint32_t TCompactProtocolT::writeFieldStop() { - return writeByte(T_STOP); -} - -/** - * Write a struct begin. This doesn't actually put anything on the wire. We - * use it as an opportunity to put special placeholder markers on the field - * stack so we can get the field id deltas correct. - */ -template -uint32_t TCompactProtocolT::writeStructBegin(const char* name) { - (void) name; - lastField_.push(lastFieldId_); - lastFieldId_ = 0; - return 0; -} - -/** - * Write a struct end. This doesn't actually put anything on the wire. We use - * this as an opportunity to pop the last field from the current struct off - * of the field stack. - */ -template -uint32_t TCompactProtocolT::writeStructEnd() { - lastFieldId_ = lastField_.top(); - lastField_.pop(); - return 0; -} - -/** - * Write a List header. - */ -template -uint32_t TCompactProtocolT::writeListBegin(const TType elemType, - const uint32_t size) { - return writeCollectionBegin(elemType, size); -} - -/** - * Write a set header. - */ -template -uint32_t TCompactProtocolT::writeSetBegin(const TType elemType, - const uint32_t size) { - return writeCollectionBegin(elemType, size); -} - -/** - * Write a map header. If the map is empty, omit the key and value type - * headers, as we don't need any additional information to skip it. - */ -template -uint32_t TCompactProtocolT::writeMapBegin(const TType keyType, - const TType valType, - const uint32_t size) { - uint32_t wsize = 0; - - if (size == 0) { - wsize += writeByte(0); - } else { - wsize += writeVarint32(size); - wsize += writeByte(getCompactType(keyType) << 4 | getCompactType(valType)); - } - return wsize; -} - -/** - * Write a boolean value. Potentially, this could be a boolean field, in - * which case the field header info isn't written yet. If so, decide what the - * right type header is for the value and then write the field header. - * Otherwise, write a single byte. - */ -template -uint32_t TCompactProtocolT::writeBool(const bool value) { - uint32_t wsize = 0; - - if (booleanField_.name != NULL) { - // we haven't written the field header yet - wsize - += writeFieldBeginInternal(booleanField_.name, - booleanField_.fieldType, - booleanField_.fieldId, - static_cast(value - ? detail::compact::CT_BOOLEAN_TRUE - : detail::compact::CT_BOOLEAN_FALSE)); - booleanField_.name = NULL; - } else { - // we're not part of a field, so just write the value - wsize - += writeByte(static_cast(value - ? detail::compact::CT_BOOLEAN_TRUE - : detail::compact::CT_BOOLEAN_FALSE)); - } - return wsize; -} - -template -uint32_t TCompactProtocolT::writeByte(const int8_t byte) { - trans_->write((uint8_t*)&byte, 1); - return 1; -} - -/** - * Write an i16 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI16(const int16_t i16) { - return writeVarint32(i32ToZigzag(i16)); -} - -/** - * Write an i32 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI32(const int32_t i32) { - return writeVarint32(i32ToZigzag(i32)); -} - -/** - * Write an i64 as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::writeI64(const int64_t i64) { - return writeVarint64(i64ToZigzag(i64)); -} - -/** - * Write a double to the wire as 8 bytes. - */ -template -uint32_t TCompactProtocolT::writeDouble(const double dub) { - //BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t)); - //BOOST_STATIC_ASSERT(std::numeric_limits::is_iec559); - - uint64_t bits = bitwise_cast(dub); - bits = THRIFT_htolell(bits); - trans_->write((uint8_t*)&bits, 8); - return 8; -} - -/** - * Write a string to the wire with a varint size preceding. - */ -template -uint32_t TCompactProtocolT::writeString(const std::string& str) { - return writeBinary(str); -} - -template -uint32_t TCompactProtocolT::writeBinary(const std::string& str) { - if(str.size() > (std::numeric_limits::max)()) - throw TProtocolException(TProtocolException::SIZE_LIMIT); - uint32_t ssize = static_cast(str.size()); - uint32_t wsize = writeVarint32(ssize) ; - // checking ssize + wsize > uint_max, but we don't want to overflow while checking for overflows. - // transforming the check to ssize > uint_max - wsize - if(ssize > (std::numeric_limits::max)() - wsize) - throw TProtocolException(TProtocolException::SIZE_LIMIT); - wsize += ssize; - trans_->write((uint8_t*)str.data(), ssize); - return wsize; -} - -// -// Internal Writing methods -// - -/** - * The workhorse of writeFieldBegin. It has the option of doing a - * 'type override' of the type header. This is used specifically in the - * boolean field case. - */ -template -int32_t TCompactProtocolT::writeFieldBeginInternal( - const char* name, - const TType fieldType, - const int16_t fieldId, - int8_t typeOverride) { - (void) name; - uint32_t wsize = 0; - - // if there's a type override, use that. - int8_t typeToWrite = (typeOverride == -1 ? getCompactType(fieldType) : typeOverride); - - // check if we can use delta encoding for the field id - if (fieldId > lastFieldId_ && fieldId - lastFieldId_ <= 15) { - // write them together - wsize += writeByte(static_cast((fieldId - lastFieldId_) - << 4 | typeToWrite)); - } else { - // write them separate - wsize += writeByte(typeToWrite); - wsize += writeI16(fieldId); - } - - lastFieldId_ = fieldId; - return wsize; -} - -/** - * Abstract method for writing the start of lists and sets. List and sets on - * the wire differ only by the type indicator. - */ -template -uint32_t TCompactProtocolT::writeCollectionBegin(const TType elemType, - int32_t size) { - uint32_t wsize = 0; - if (size <= 14) { - wsize += writeByte(static_cast(size - << 4 | getCompactType(elemType))); - } else { - wsize += writeByte(0xf0 | getCompactType(elemType)); - wsize += writeVarint32(size); - } - return wsize; -} - -/** - * Write an i32 as a varint. Results in 1-5 bytes on the wire. - */ -template -uint32_t TCompactProtocolT::writeVarint32(uint32_t n) { - uint8_t buf[5]; - uint32_t wsize = 0; - - while (true) { - if ((n & ~0x7F) == 0) { - buf[wsize++] = (int8_t)n; - break; - } else { - buf[wsize++] = (int8_t)((n & 0x7F) | 0x80); - n >>= 7; - } - } - trans_->write(buf, wsize); - return wsize; -} - -/** - * Write an i64 as a varint. Results in 1-10 bytes on the wire. - */ -template -uint32_t TCompactProtocolT::writeVarint64(uint64_t n) { - uint8_t buf[10]; - uint32_t wsize = 0; - - while (true) { - if ((n & ~0x7FL) == 0) { - buf[wsize++] = (int8_t)n; - break; - } else { - buf[wsize++] = (int8_t)((n & 0x7F) | 0x80); - n >>= 7; - } - } - trans_->write(buf, wsize); - return wsize; -} - -/** - * Convert l into a zigzag long. This allows negative numbers to be - * represented compactly as a varint. - */ -template -uint64_t TCompactProtocolT::i64ToZigzag(const int64_t l) { - return (static_cast(l) << 1) ^ (l >> 63); -} - -/** - * Convert n into a zigzag int. This allows negative numbers to be - * represented compactly as a varint. - */ -template -uint32_t TCompactProtocolT::i32ToZigzag(const int32_t n) { - return (static_cast(n) << 1) ^ (n >> 31); -} - -/** - * Given a TType value, find the appropriate detail::compact::Types value - */ -template -int8_t TCompactProtocolT::getCompactType(const TType ttype) { - return detail::compact::TTypeToCType[ttype]; -} - -// -// Reading Methods -// - -/** - * Read a message header. - */ -template -uint32_t TCompactProtocolT::readMessageBegin( - std::string& name, - TMessageType& messageType, - int32_t& seqid) { - uint32_t rsize = 0; - int8_t protocolId; - int8_t versionAndType; - int8_t version; - - rsize += readByte(protocolId); - if (protocolId != PROTOCOL_ID) { - throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol identifier"); - } - - rsize += readByte(versionAndType); - version = (int8_t)(versionAndType & VERSION_MASK); - if (version != VERSION_N) { - throw TProtocolException(TProtocolException::BAD_VERSION, "Bad protocol version"); - } - - messageType = (TMessageType)((versionAndType >> TYPE_SHIFT_AMOUNT) & TYPE_BITS); - rsize += readVarint32(seqid); - rsize += readString(name); - - return rsize; -} - -/** - * Read a struct begin. There's nothing on the wire for this, but it is our - * opportunity to push a new struct begin marker on the field stack. - */ -template -uint32_t TCompactProtocolT::readStructBegin(std::string& name) { - name = ""; - lastField_.push(lastFieldId_); - lastFieldId_ = 0; - return 0; -} - -/** - * Doesn't actually consume any wire data, just removes the last field for - * this struct from the field stack. - */ -template -uint32_t TCompactProtocolT::readStructEnd() { - lastFieldId_ = lastField_.top(); - lastField_.pop(); - return 0; -} - -/** - * Read a field header off the wire. - */ -template -uint32_t TCompactProtocolT::readFieldBegin(std::string& name, - TType& fieldType, - int16_t& fieldId) { - (void) name; - uint32_t rsize = 0; - int8_t byte; - int8_t type; - - rsize += readByte(byte); - type = (byte & 0x0f); - - // if it's a stop, then we can return immediately, as the struct is over. - if (type == T_STOP) { - fieldType = T_STOP; - fieldId = 0; - return rsize; - } - - // mask off the 4 MSB of the type header. it could contain a field id delta. - int16_t modifier = (int16_t)(((uint8_t)byte & 0xf0) >> 4); - if (modifier == 0) { - // not a delta, look ahead for the zigzag varint field id. - rsize += readI16(fieldId); - } else { - fieldId = (int16_t)(lastFieldId_ + modifier); - } - fieldType = getTType(type); - - // if this happens to be a boolean field, the value is encoded in the type - if (type == detail::compact::CT_BOOLEAN_TRUE || - type == detail::compact::CT_BOOLEAN_FALSE) { - // save the boolean value in a special instance variable. - boolValue_.hasBoolValue = true; - boolValue_.boolValue = - (type == detail::compact::CT_BOOLEAN_TRUE ? true : false); - } - - // push the new field onto the field stack so we can keep the deltas going. - lastFieldId_ = fieldId; - return rsize; -} - -/** - * Read a map header off the wire. If the size is zero, skip reading the key - * and value type. This means that 0-length maps will yield TMaps without the - * "correct" types. - */ -template -uint32_t TCompactProtocolT::readMapBegin(TType& keyType, - TType& valType, - uint32_t& size) { - uint32_t rsize = 0; - int8_t kvType = 0; - int32_t msize = 0; - - rsize += readVarint32(msize); - if (msize != 0) - rsize += readByte(kvType); - - if (msize < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } else if (container_limit_ && msize > container_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - keyType = getTType((int8_t)((uint8_t)kvType >> 4)); - valType = getTType((int8_t)((uint8_t)kvType & 0xf)); - size = (uint32_t)msize; - - return rsize; -} - -/** - * Read a list header off the wire. If the list size is 0-14, the size will - * be packed into the element type header. If it's a longer list, the 4 MSB - * of the element type header will be 0xF, and a varint will follow with the - * true size. - */ -template -uint32_t TCompactProtocolT::readListBegin(TType& elemType, - uint32_t& size) { - int8_t size_and_type; - uint32_t rsize = 0; - int32_t lsize; - - rsize += readByte(size_and_type); - - lsize = ((uint8_t)size_and_type >> 4) & 0x0f; - if (lsize == 15) { - rsize += readVarint32(lsize); - } - - if (lsize < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } else if (container_limit_ && lsize > container_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - elemType = getTType((int8_t)(size_and_type & 0x0f)); - size = (uint32_t)lsize; - - return rsize; -} - -/** - * Read a set header off the wire. If the set size is 0-14, the size will - * be packed into the element type header. If it's a longer set, the 4 MSB - * of the element type header will be 0xF, and a varint will follow with the - * true size. - */ -template -uint32_t TCompactProtocolT::readSetBegin(TType& elemType, - uint32_t& size) { - return readListBegin(elemType, size); -} - -/** - * Read a boolean off the wire. If this is a boolean field, the value should - * already have been read during readFieldBegin, so we'll just consume the - * pre-stored value. Otherwise, read a byte. - */ -template -uint32_t TCompactProtocolT::readBool(bool& value) { - if (boolValue_.hasBoolValue == true) { - value = boolValue_.boolValue; - boolValue_.hasBoolValue = false; - return 0; - } else { - int8_t val; - readByte(val); - value = (val == detail::compact::CT_BOOLEAN_TRUE); - return 1; - } -} - -/** - * Read a single byte off the wire. Nothing interesting here. - */ -template -uint32_t TCompactProtocolT::readByte(int8_t& byte) { - uint8_t b[1]; - trans_->readAll(b, 1); - byte = *(int8_t*)b; - return 1; -} - -/** - * Read an i16 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI16(int16_t& i16) { - int32_t value; - uint32_t rsize = readVarint32(value); - i16 = (int16_t)zigzagToI32(value); - return rsize; -} - -/** - * Read an i32 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI32(int32_t& i32) { - int32_t value; - uint32_t rsize = readVarint32(value); - i32 = zigzagToI32(value); - return rsize; -} - -/** - * Read an i64 from the wire as a zigzag varint. - */ -template -uint32_t TCompactProtocolT::readI64(int64_t& i64) { - int64_t value; - uint32_t rsize = readVarint64(value); - i64 = zigzagToI64(value); - return rsize; -} - -/** - * No magic here - just read a double off the wire. - */ -template -uint32_t TCompactProtocolT::readDouble(double& dub) { - //BOOST_STATIC_ASSERT(sizeof(double) == sizeof(uint64_t)); - //BOOST_STATIC_ASSERT(std::numeric_limits::is_iec559); - - union { - uint64_t bits; - uint8_t b[8]; - } u; - trans_->readAll(u.b, 8); - u.bits = THRIFT_letohll(u.bits); - dub = bitwise_cast(u.bits); - return 8; -} - -template -uint32_t TCompactProtocolT::readString(std::string& str) { - return readBinary(str); -} - -/** - * Read a byte[] from the wire. - */ -template -uint32_t TCompactProtocolT::readBinary(std::string& str) { - int32_t rsize = 0; - int32_t size; - - rsize += readVarint32(size); - // Catch empty string case - if (size == 0) { - str = ""; - return rsize; - } - - // Catch error cases - if (size < 0) { - throw TProtocolException(TProtocolException::NEGATIVE_SIZE); - } - if (string_limit_ > 0 && size > string_limit_) { - throw TProtocolException(TProtocolException::SIZE_LIMIT); - } - - // Use the heap here to prevent stack overflow for v. large strings - if (size > string_buf_size_ || string_buf_ == NULL) { - void* new_string_buf = std::realloc(string_buf_, (uint32_t)size); - if (new_string_buf == NULL) { - throw std::bad_alloc(); - } - string_buf_ = (uint8_t*)new_string_buf; - string_buf_size_ = size; - } - trans_->readAll(string_buf_, size); - str.assign((char*)string_buf_, size); - - return rsize + (uint32_t)size; -} - -/** - * Read an i32 from the wire as a varint. The MSB of each byte is set - * if there is another byte to follow. This can read up to 5 bytes. - */ -template -uint32_t TCompactProtocolT::readVarint32(int32_t& i32) { - int64_t val; - uint32_t rsize = readVarint64(val); - i32 = (int32_t)val; - return rsize; -} - -/** - * Read an i64 from the wire as a proper varint. The MSB of each byte is set - * if there is another byte to follow. This can read up to 10 bytes. - */ -template -uint32_t TCompactProtocolT::readVarint64(int64_t& i64) { - uint32_t rsize = 0; - uint64_t val = 0; - int shift = 0; - uint8_t buf[10]; // 64 bits / (7 bits/byte) = 10 bytes. - uint32_t buf_size = sizeof(buf); - const uint8_t* borrowed = trans_->borrow(buf, &buf_size); - - // Fast path. - if (borrowed != NULL) { - while (true) { - uint8_t byte = borrowed[rsize]; - rsize++; - val |= (uint64_t)(byte & 0x7f) << shift; - shift += 7; - if (!(byte & 0x80)) { - i64 = val; - trans_->consume(rsize); - return rsize; - } - // Have to check for invalid data so we don't crash. - if (UNLIKELY(rsize == sizeof(buf))) { - throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes."); - } - } - } - - // Slow path. - else { - while (true) { - uint8_t byte; - rsize += trans_->readAll(&byte, 1); - val |= (uint64_t)(byte & 0x7f) << shift; - shift += 7; - if (!(byte & 0x80)) { - i64 = val; - return rsize; - } - // Might as well check for invalid data on the slow path too. - if (UNLIKELY(rsize >= sizeof(buf))) { - throw TProtocolException(TProtocolException::INVALID_DATA, "Variable-length int over 10 bytes."); - } - } - } -} - -/** - * Convert from zigzag int to int. - */ -template -int32_t TCompactProtocolT::zigzagToI32(uint32_t n) { - return (n >> 1) ^ static_cast(-static_cast(n & 1)); -} - -/** - * Convert from zigzag long to long. - */ -template -int64_t TCompactProtocolT::zigzagToI64(uint64_t n) { - return (n >> 1) ^ static_cast(-static_cast(n & 1)); -} - -template -TType TCompactProtocolT::getTType(int8_t type) { - switch (type) { - case T_STOP: - return T_STOP; - case detail::compact::CT_BOOLEAN_FALSE: - case detail::compact::CT_BOOLEAN_TRUE: - return T_BOOL; - case detail::compact::CT_BYTE: - return T_BYTE; - case detail::compact::CT_I16: - return T_I16; - case detail::compact::CT_I32: - return T_I32; - case detail::compact::CT_I64: - return T_I64; - case detail::compact::CT_DOUBLE: - return T_DOUBLE; - case detail::compact::CT_BINARY: - return T_STRING; - case detail::compact::CT_LIST: - return T_LIST; - case detail::compact::CT_SET: - return T_SET; - case detail::compact::CT_MAP: - return T_MAP; - case detail::compact::CT_STRUCT: - return T_STRUCT; - default: - throw TException(std::string("don't know what type: ") + (char)type); - } -} - -}}} // duckdb_apache::thrift::protocol - -#endif // _DUCKDB_THRIFT_PROTOCOL_TCOMPACTPROTOCOL_TCC_ diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp b/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp deleted file mode 100644 index d70f5634c..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "thrift/protocol/TProtocol.h" - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -TProtocol::~TProtocol() = default; -uint32_t TProtocol::skip_virt(TType type) { - return ::duckdb_apache::thrift::protocol::skip(*this, type); -} - -TProtocolFactory::~TProtocolFactory() = default; - -}}} // duckdb_apache::thrift::protocol diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.h b/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.h deleted file mode 100644 index 517aceb75..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TProtocol.h +++ /dev/null @@ -1,766 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1 - -#ifdef _WIN32 -// Need to come before any Windows.h includes -#include -#endif - -#include "thrift/transport/TTransport.h" -#include "thrift/protocol/TProtocolException.h" - -#include - -#ifdef HAVE_NETINET_IN_H -#include -#endif -#include -#include -#include -#include "duckdb/common/vector.hpp" -#include - -// Use this to get around strict aliasing rules. -// For example, uint64_t i = bitwise_cast(returns_double()); -// The most obvious implementation is to just cast a pointer, -// but that doesn't work. -// For a pretty in-depth explanation of the problem, see -// http://cellperformance.beyond3d.com/articles/2006/06/understanding-strict-aliasing.html -namespace duckdb_apache { namespace thrift { -template -static inline To bitwise_cast(From from) { - static_assert(sizeof(From) == sizeof(To), "sizeof(From) == sizeof(To)"); - - // BAD!!! These are all broken with -O2. - // return *reinterpret_cast(&from); // BAD!!! - // return *static_cast(static_cast(&from)); // BAD!!! - // return *(To*)(void*)&from; // BAD!!! - - // Super clean and paritally blessed by section 3.9 of the standard. - // unsigned char c[sizeof(from)]; - // memcpy(c, &from, sizeof(from)); - // To to; - // memcpy(&to, c, sizeof(c)); - // return to; - - // Slightly more questionable. - // Same code emitted by GCC. - // To to; - // memcpy(&to, &from, sizeof(from)); - // return to; - - // Technically undefined, but almost universally supported, - // and the most efficient implementation. - union { - From f; - To t; - } u; - u.f = from; - return u.t; -} -}} // namespace duckdb_apache::thrift - - -#ifdef HAVE_SYS_PARAM_H -#include -#endif - -#ifndef __THRIFT_BYTE_ORDER -# if defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && defined(BIG_ENDIAN) -# define __THRIFT_BYTE_ORDER BYTE_ORDER -# define __THRIFT_LITTLE_ENDIAN LITTLE_ENDIAN -# define __THRIFT_BIG_ENDIAN BIG_ENDIAN -# else -//# include -# if BOOST_ENDIAN_BIG_BYTE -# define __THRIFT_BYTE_ORDER 4321 -# define __THRIFT_LITTLE_ENDIAN 0 -# define __THRIFT_BIG_ENDIAN __THRIFT_BYTE_ORDER -# elif BOOST_ENDIAN_LITTLE_BYTE -# define __THRIFT_BYTE_ORDER 1234 -# define __THRIFT_LITTLE_ENDIAN __THRIFT_BYTE_ORDER -# define __THRIFT_BIG_ENDIAN 0 -# endif -# ifdef BOOST_LITTLE_ENDIAN -# else -# endif -# endif -#endif - -#if __THRIFT_BYTE_ORDER == __THRIFT_BIG_ENDIAN -# if !defined(THRIFT_ntohll) -# define THRIFT_ntohll(n) (n) -# define THRIFT_htonll(n) (n) -# endif -# if defined(__GNUC__) && defined(__GLIBC__) -# include -# define THRIFT_htolell(n) bswap_64(n) -# define THRIFT_letohll(n) bswap_64(n) -# define THRIFT_htolel(n) bswap_32(n) -# define THRIFT_letohl(n) bswap_32(n) -# define THRIFT_htoles(n) bswap_16(n) -# define THRIFT_letohs(n) bswap_16(n) -# else /* GNUC & GLIBC */ -# define bswap_64(n) \ - ( (((n) & 0xff00000000000000ull) >> 56) \ - | (((n) & 0x00ff000000000000ull) >> 40) \ - | (((n) & 0x0000ff0000000000ull) >> 24) \ - | (((n) & 0x000000ff00000000ull) >> 8) \ - | (((n) & 0x00000000ff000000ull) << 8) \ - | (((n) & 0x0000000000ff0000ull) << 24) \ - | (((n) & 0x000000000000ff00ull) << 40) \ - | (((n) & 0x00000000000000ffull) << 56) ) -# define bswap_32(n) \ - ( (((n) & 0xff000000ul) >> 24) \ - | (((n) & 0x00ff0000ul) >> 8) \ - | (((n) & 0x0000ff00ul) << 8) \ - | (((n) & 0x000000fful) << 24) ) -# define bswap_16(n) \ - ( (((n) & ((unsigned short)0xff00ul)) >> 8) \ - | (((n) & ((unsigned short)0x00fful)) << 8) ) -# define THRIFT_htolell(n) bswap_64(n) -# define THRIFT_letohll(n) bswap_64(n) -# define THRIFT_htolel(n) bswap_32(n) -# define THRIFT_letohl(n) bswap_32(n) -# define THRIFT_htoles(n) bswap_16(n) -# define THRIFT_letohs(n) bswap_16(n) -# endif /* GNUC & GLIBC */ -#elif __THRIFT_BYTE_ORDER == __THRIFT_LITTLE_ENDIAN -# define THRIFT_htolell(n) (n) -# define THRIFT_letohll(n) (n) -# define THRIFT_htolel(n) (n) -# define THRIFT_letohl(n) (n) -# define THRIFT_htoles(n) (n) -# define THRIFT_letohs(n) (n) -# if defined(__GNUC__) && defined(__GLIBC__) -# include -# define THRIFT_ntohll(n) bswap_64(n) -# define THRIFT_htonll(n) bswap_64(n) -# elif defined(_MSC_VER) /* Microsoft Visual C++ */ -# define THRIFT_ntohll(n) ( _byteswap_uint64((uint64_t)n) ) -# define THRIFT_htonll(n) ( _byteswap_uint64((uint64_t)n) ) -# elif !defined(THRIFT_ntohll) /* Not GNUC/GLIBC or MSVC */ -# define THRIFT_ntohll(n) ( (((uint64_t)ntohl((uint32_t)n)) << 32) + ntohl((uint32_t)(n >> 32)) ) -# define THRIFT_htonll(n) ( (((uint64_t)htonl((uint32_t)n)) << 32) + htonl((uint32_t)(n >> 32)) ) -# endif /* GNUC/GLIBC or MSVC or something else */ -#else /* __THRIFT_BYTE_ORDER */ -# error "Can't define THRIFT_htonll or THRIFT_ntohll!" -#endif - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -using duckdb_apache::thrift::transport::TTransport; - -/** - * Enumerated definition of the types that the Thrift protocol supports. - * Take special note of the T_END type which is used specifically to mark - * the end of a sequence of fields. - */ -enum TType { - T_STOP = 0, - T_VOID = 1, - T_BOOL = 2, - T_BYTE = 3, - T_I08 = 3, - T_I16 = 6, - T_I32 = 8, - T_U64 = 9, - T_I64 = 10, - T_DOUBLE = 4, - T_STRING = 11, - T_UTF7 = 11, - T_STRUCT = 12, - T_MAP = 13, - T_SET = 14, - T_LIST = 15, - T_UTF8 = 16, - T_UTF16 = 17 -}; - -/** - * Enumerated definition of the message types that the Thrift protocol - * supports. - */ -enum TMessageType { - T_CALL = 1, - T_REPLY = 2, - T_EXCEPTION = 3, - T_ONEWAY = 4 -}; - -static const uint32_t DEFAULT_RECURSION_LIMIT = 64; - -/** - * Abstract class for a thrift protocol driver. These are all the methods that - * a protocol must implement. Essentially, there must be some way of reading - * and writing all the base types, plus a mechanism for writing out structs - * with indexed fields. - * - * TProtocol objects should not be shared across multiple encoding contexts, - * as they may need to maintain internal state in some protocols (i.e. XML). - * Note that is is acceptable for the TProtocol module to do its own internal - * buffered reads/writes to the underlying TTransport where appropriate (i.e. - * when parsing an input XML stream, reading should be batched rather than - * looking ahead character by character for a close tag). - * - */ -class TProtocol { -public: - virtual ~TProtocol(); - - /** - * Writing functions. - */ - - virtual uint32_t writeMessageBegin_virt(const std::string& name, - const TMessageType messageType, - const int32_t seqid) = 0; - - virtual uint32_t writeMessageEnd_virt() = 0; - - virtual uint32_t writeStructBegin_virt(const char* name) = 0; - - virtual uint32_t writeStructEnd_virt() = 0; - - virtual uint32_t writeFieldBegin_virt(const char* name, - const TType fieldType, - const int16_t fieldId) = 0; - - virtual uint32_t writeFieldEnd_virt() = 0; - - virtual uint32_t writeFieldStop_virt() = 0; - - virtual uint32_t writeMapBegin_virt(const TType keyType, const TType valType, const uint32_t size) - = 0; - - virtual uint32_t writeMapEnd_virt() = 0; - - virtual uint32_t writeListBegin_virt(const TType elemType, const uint32_t size) = 0; - - virtual uint32_t writeListEnd_virt() = 0; - - virtual uint32_t writeSetBegin_virt(const TType elemType, const uint32_t size) = 0; - - virtual uint32_t writeSetEnd_virt() = 0; - - virtual uint32_t writeBool_virt(const bool value) = 0; - - virtual uint32_t writeByte_virt(const int8_t byte) = 0; - - virtual uint32_t writeI16_virt(const int16_t i16) = 0; - - virtual uint32_t writeI32_virt(const int32_t i32) = 0; - - virtual uint32_t writeI64_virt(const int64_t i64) = 0; - - virtual uint32_t writeDouble_virt(const double dub) = 0; - - virtual uint32_t writeString_virt(const std::string& str) = 0; - - virtual uint32_t writeBinary_virt(const std::string& str) = 0; - - uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - T_VIRTUAL_CALL(); - return writeMessageBegin_virt(name, messageType, seqid); - } - - uint32_t writeMessageEnd() { - T_VIRTUAL_CALL(); - return writeMessageEnd_virt(); - } - - uint32_t writeStructBegin(const char* name) { - T_VIRTUAL_CALL(); - return writeStructBegin_virt(name); - } - - uint32_t writeStructEnd() { - T_VIRTUAL_CALL(); - return writeStructEnd_virt(); - } - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId) { - T_VIRTUAL_CALL(); - return writeFieldBegin_virt(name, fieldType, fieldId); - } - - uint32_t writeFieldEnd() { - T_VIRTUAL_CALL(); - return writeFieldEnd_virt(); - } - - uint32_t writeFieldStop() { - T_VIRTUAL_CALL(); - return writeFieldStop_virt(); - } - - uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeMapBegin_virt(keyType, valType, size); - } - - uint32_t writeMapEnd() { - T_VIRTUAL_CALL(); - return writeMapEnd_virt(); - } - - uint32_t writeListBegin(const TType elemType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeListBegin_virt(elemType, size); - } - - uint32_t writeListEnd() { - T_VIRTUAL_CALL(); - return writeListEnd_virt(); - } - - uint32_t writeSetBegin(const TType elemType, const uint32_t size) { - T_VIRTUAL_CALL(); - return writeSetBegin_virt(elemType, size); - } - - uint32_t writeSetEnd() { - T_VIRTUAL_CALL(); - return writeSetEnd_virt(); - } - - uint32_t writeBool(const bool value) { - T_VIRTUAL_CALL(); - return writeBool_virt(value); - } - - uint32_t writeByte(const int8_t byte) { - T_VIRTUAL_CALL(); - return writeByte_virt(byte); - } - - uint32_t writeI16(const int16_t i16) { - T_VIRTUAL_CALL(); - return writeI16_virt(i16); - } - - uint32_t writeI32(const int32_t i32) { - T_VIRTUAL_CALL(); - return writeI32_virt(i32); - } - - uint32_t writeI64(const int64_t i64) { - T_VIRTUAL_CALL(); - return writeI64_virt(i64); - } - - uint32_t writeDouble(const double dub) { - T_VIRTUAL_CALL(); - return writeDouble_virt(dub); - } - - uint32_t writeString(const std::string& str) { - T_VIRTUAL_CALL(); - return writeString_virt(str); - } - - uint32_t writeBinary(const std::string& str) { - T_VIRTUAL_CALL(); - return writeBinary_virt(str); - } - - /** - * Reading functions - */ - - virtual uint32_t readMessageBegin_virt(std::string& name, - TMessageType& messageType, - int32_t& seqid) = 0; - - virtual uint32_t readMessageEnd_virt() = 0; - - virtual uint32_t readStructBegin_virt(std::string& name) = 0; - - virtual uint32_t readStructEnd_virt() = 0; - - virtual uint32_t readFieldBegin_virt(std::string& name, TType& fieldType, int16_t& fieldId) = 0; - - virtual uint32_t readFieldEnd_virt() = 0; - - virtual uint32_t readMapBegin_virt(TType& keyType, TType& valType, uint32_t& size) = 0; - - virtual uint32_t readMapEnd_virt() = 0; - - virtual uint32_t readListBegin_virt(TType& elemType, uint32_t& size) = 0; - - virtual uint32_t readListEnd_virt() = 0; - - virtual uint32_t readSetBegin_virt(TType& elemType, uint32_t& size) = 0; - - virtual uint32_t readSetEnd_virt() = 0; - - virtual uint32_t readBool_virt(bool& value) = 0; - - virtual uint32_t readBool_virt(std::vector::reference value) = 0; - - virtual uint32_t readByte_virt(int8_t& byte) = 0; - - virtual uint32_t readI16_virt(int16_t& i16) = 0; - - virtual uint32_t readI32_virt(int32_t& i32) = 0; - - virtual uint32_t readI64_virt(int64_t& i64) = 0; - - virtual uint32_t readDouble_virt(double& dub) = 0; - - virtual uint32_t readString_virt(std::string& str) = 0; - - virtual uint32_t readBinary_virt(std::string& str) = 0; - - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid) { - T_VIRTUAL_CALL(); - return readMessageBegin_virt(name, messageType, seqid); - } - - uint32_t readMessageEnd() { - T_VIRTUAL_CALL(); - return readMessageEnd_virt(); - } - - uint32_t readStructBegin(std::string& name) { - T_VIRTUAL_CALL(); - return readStructBegin_virt(name); - } - - uint32_t readStructEnd() { - T_VIRTUAL_CALL(); - return readStructEnd_virt(); - } - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId) { - T_VIRTUAL_CALL(); - return readFieldBegin_virt(name, fieldType, fieldId); - } - - uint32_t readFieldEnd() { - T_VIRTUAL_CALL(); - return readFieldEnd_virt(); - } - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readMapBegin_virt(keyType, valType, size); - } - - uint32_t readMapEnd() { - T_VIRTUAL_CALL(); - return readMapEnd_virt(); - } - - uint32_t readListBegin(TType& elemType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readListBegin_virt(elemType, size); - } - - uint32_t readListEnd() { - T_VIRTUAL_CALL(); - return readListEnd_virt(); - } - - uint32_t readSetBegin(TType& elemType, uint32_t& size) { - T_VIRTUAL_CALL(); - return readSetBegin_virt(elemType, size); - } - - uint32_t readSetEnd() { - T_VIRTUAL_CALL(); - return readSetEnd_virt(); - } - - uint32_t readBool(bool& value) { - T_VIRTUAL_CALL(); - return readBool_virt(value); - } - - uint32_t readByte(int8_t& byte) { - T_VIRTUAL_CALL(); - return readByte_virt(byte); - } - - uint32_t readI16(int16_t& i16) { - T_VIRTUAL_CALL(); - return readI16_virt(i16); - } - - uint32_t readI32(int32_t& i32) { - T_VIRTUAL_CALL(); - return readI32_virt(i32); - } - - uint32_t readI64(int64_t& i64) { - T_VIRTUAL_CALL(); - return readI64_virt(i64); - } - - uint32_t readDouble(double& dub) { - T_VIRTUAL_CALL(); - return readDouble_virt(dub); - } - - uint32_t readString(std::string& str) { - T_VIRTUAL_CALL(); - return readString_virt(str); - } - - uint32_t readBinary(std::string& str) { - T_VIRTUAL_CALL(); - return readBinary_virt(str); - } - - /* - * std::vector is specialized for bool, and its elements are individual bits - * rather than bools. We need to define a different version of readBool() - * to work with std::vector. - */ - uint32_t readBool(std::vector::reference value) { - T_VIRTUAL_CALL(); - return readBool_virt(value); - } - - /** - * Method to arbitrarily skip over data. - */ - uint32_t skip(TType type) { - T_VIRTUAL_CALL(); - return skip_virt(type); - } - virtual uint32_t skip_virt(TType type); - - inline std::shared_ptr getTransport() { return ptrans_; } - - // TODO: remove these two calls, they are for backwards - // compatibility - inline std::shared_ptr getInputTransport() { return ptrans_; } - inline std::shared_ptr getOutputTransport() { return ptrans_; } - - // input and output recursion depth are kept separate so that one protocol - // can be used concurrently for both input and output. - void incrementInputRecursionDepth() { - if (recursion_limit_ < ++input_recursion_depth_) { - throw TProtocolException(TProtocolException::DEPTH_LIMIT); - } - } - void decrementInputRecursionDepth() { --input_recursion_depth_; } - - void incrementOutputRecursionDepth() { - if (recursion_limit_ < ++output_recursion_depth_) { - throw TProtocolException(TProtocolException::DEPTH_LIMIT); - } - } - void decrementOutputRecursionDepth() { --output_recursion_depth_; } - - uint32_t getRecursionLimit() const {return recursion_limit_;} - void setRecurisionLimit(uint32_t depth) {recursion_limit_ = depth;} - -protected: - TProtocol(std::shared_ptr ptrans) - : ptrans_(ptrans), input_recursion_depth_(0), output_recursion_depth_(0), recursion_limit_(DEFAULT_RECURSION_LIMIT) - {} - - std::shared_ptr ptrans_; - -private: - TProtocol() = default; - uint32_t input_recursion_depth_; - uint32_t output_recursion_depth_; - uint32_t recursion_limit_; -}; - -/** - * Constructs input and output protocol objects given transports. - */ -class TProtocolFactory { -public: - TProtocolFactory() = default; - - virtual ~TProtocolFactory(); - - virtual std::shared_ptr getProtocol(std::shared_ptr trans) = 0; - virtual std::shared_ptr getProtocol(std::shared_ptr inTrans, - std::shared_ptr outTrans) { - (void)outTrans; - return getProtocol(inTrans); - } -}; - -/** - * Dummy protocol class. - * - * This class does nothing, and should never be instantiated. - * It is used only by the generator code. - */ -class TDummyProtocol : public TProtocol {}; - - -// HM: this is sub-optimal since it creates a depencency even for memory-only struct -//// This is the default / legacy choice -//struct TNetworkBigEndian -//{ -// static uint16_t toWire16(uint16_t x) {return htons(x);} -// static uint32_t toWire32(uint32_t x) {return htonl(x);} -// static uint64_t toWire64(uint64_t x) {return THRIFT_htonll(x);} -// static uint16_t fromWire16(uint16_t x) {return ntohs(x);} -// static uint32_t fromWire32(uint32_t x) {return ntohl(x);} -// static uint64_t fromWire64(uint64_t x) {return THRIFT_ntohll(x);} -//}; -// -//// On most systems, this will be a bit faster than TNetworkBigEndian -//struct TNetworkLittleEndian -//{ -// static uint16_t toWire16(uint16_t x) {return THRIFT_htoles(x);} -// static uint32_t toWire32(uint32_t x) {return THRIFT_htolel(x);} -// static uint64_t toWire64(uint64_t x) {return THRIFT_htolell(x);} -// static uint16_t fromWire16(uint16_t x) {return THRIFT_letohs(x);} -// static uint32_t fromWire32(uint32_t x) {return THRIFT_letohl(x);} -// static uint64_t fromWire64(uint64_t x) {return THRIFT_letohll(x);} -//}; - -struct TOutputRecursionTracker { - TProtocol &prot_; - TOutputRecursionTracker(TProtocol &prot) : prot_(prot) { - prot_.incrementOutputRecursionDepth(); - } - ~TOutputRecursionTracker() { - prot_.decrementOutputRecursionDepth(); - } -}; - -struct TInputRecursionTracker { - TProtocol &prot_; - TInputRecursionTracker(TProtocol &prot) : prot_(prot) { - prot_.incrementInputRecursionDepth(); - } - ~TInputRecursionTracker() { - prot_.decrementInputRecursionDepth(); - } -}; - -/** - * Helper template for implementing TProtocol::skip(). - * - * Templatized to avoid having to make virtual function calls. - */ -template -uint32_t skip(Protocol_& prot, TType type) { - TInputRecursionTracker tracker(prot); - - switch (type) { - case T_BOOL: { - bool boolv; - return prot.readBool(boolv); - } - case T_BYTE: { - int8_t bytev = 0; - return prot.readByte(bytev); - } - case T_I16: { - int16_t i16; - return prot.readI16(i16); - } - case T_I32: { - int32_t i32; - return prot.readI32(i32); - } - case T_I64: { - int64_t i64; - return prot.readI64(i64); - } - case T_DOUBLE: { - double dub; - return prot.readDouble(dub); - } - case T_STRING: { - std::string str; - return prot.readBinary(str); - } - case T_STRUCT: { - uint32_t result = 0; - std::string name; - int16_t fid; - TType ftype; - result += prot.readStructBegin(name); - while (true) { - result += prot.readFieldBegin(name, ftype, fid); - if (ftype == T_STOP) { - break; - } - result += skip(prot, ftype); - result += prot.readFieldEnd(); - } - result += prot.readStructEnd(); - return result; - } - case T_MAP: { - uint32_t result = 0; - TType keyType; - TType valType; - uint32_t i, size; - result += prot.readMapBegin(keyType, valType, size); - for (i = 0; i < size; i++) { - result += skip(prot, keyType); - result += skip(prot, valType); - } - result += prot.readMapEnd(); - return result; - } - case T_SET: { - uint32_t result = 0; - TType elemType; - uint32_t i, size; - result += prot.readSetBegin(elemType, size); - for (i = 0; i < size; i++) { - result += skip(prot, elemType); - } - result += prot.readSetEnd(); - return result; - } - case T_LIST: { - uint32_t result = 0; - TType elemType; - uint32_t i, size; - result += prot.readListBegin(elemType, size); - for (i = 0; i < size; i++) { - result += skip(prot, elemType); - } - result += prot.readListEnd(); - return result; - } - default: - break; - } - - throw TProtocolException(TProtocolException::INVALID_DATA, - "invalid TType"); -} - -}}} // duckdb_apache::thrift::protocol - -#endif // #define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOL_H_ 1 diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolDecorator.h b/src/duckdb/third_party/thrift/thrift/protocol/TProtocolDecorator.h deleted file mode 100644 index 637377011..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolDecorator.h +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef THRIFT_TPROTOCOLDECORATOR_H_ -#define THRIFT_TPROTOCOLDECORATOR_H_ 1 - -#include "thrift/protocol/TProtocol.h" -#include -#include "duckdb/common/vector.hpp" - -namespace duckdb_apache { -namespace thrift { -namespace protocol { -using std::shared_ptr; - -/** - * TProtocolDecorator forwards all requests to an enclosed - * TProtocol instance, providing a way to author concise - * concrete decorator subclasses. - * - *

See p.175 of Design Patterns (by Gamma et al.)

- * - * @see duckdb_apache::thrift::protocol::TMultiplexedProtocol - */ -class TProtocolDecorator : public TProtocol { -public: - ~TProtocolDecorator() override = default; - - // Desc: Initializes the protocol decorator object. - TProtocolDecorator(shared_ptr proto) - : TProtocol(proto->getTransport()), protocol(proto) {} - - uint32_t writeMessageBegin_virt(const std::string& name, - const TMessageType messageType, - const int32_t seqid) override { - return protocol->writeMessageBegin(name, messageType, seqid); - } - uint32_t writeMessageEnd_virt() override { return protocol->writeMessageEnd(); } - uint32_t writeStructBegin_virt(const char* name) override { - return protocol->writeStructBegin(name); - } - uint32_t writeStructEnd_virt() override { return protocol->writeStructEnd(); } - - uint32_t writeFieldBegin_virt(const char* name, - const TType fieldType, - const int16_t fieldId) override { - return protocol->writeFieldBegin(name, fieldType, fieldId); - } - - uint32_t writeFieldEnd_virt() override { return protocol->writeFieldEnd(); } - uint32_t writeFieldStop_virt() override { return protocol->writeFieldStop(); } - - uint32_t writeMapBegin_virt(const TType keyType, - const TType valType, - const uint32_t size) override { - return protocol->writeMapBegin(keyType, valType, size); - } - - uint32_t writeMapEnd_virt() override { return protocol->writeMapEnd(); } - - uint32_t writeListBegin_virt(const TType elemType, const uint32_t size) override { - return protocol->writeListBegin(elemType, size); - } - uint32_t writeListEnd_virt() override { return protocol->writeListEnd(); } - - uint32_t writeSetBegin_virt(const TType elemType, const uint32_t size) override { - return protocol->writeSetBegin(elemType, size); - } - uint32_t writeSetEnd_virt() override { return protocol->writeSetEnd(); } - - uint32_t writeBool_virt(const bool value) override { return protocol->writeBool(value); } - uint32_t writeByte_virt(const int8_t byte) override { return protocol->writeByte(byte); } - uint32_t writeI16_virt(const int16_t i16) override { return protocol->writeI16(i16); } - uint32_t writeI32_virt(const int32_t i32) override { return protocol->writeI32(i32); } - uint32_t writeI64_virt(const int64_t i64) override { return protocol->writeI64(i64); } - - uint32_t writeDouble_virt(const double dub) override { return protocol->writeDouble(dub); } - uint32_t writeString_virt(const std::string& str) override { return protocol->writeString(str); } - uint32_t writeBinary_virt(const std::string& str) override { return protocol->writeBinary(str); } - - uint32_t readMessageBegin_virt(std::string& name, - TMessageType& messageType, - int32_t& seqid) override { - return protocol->readMessageBegin(name, messageType, seqid); - } - uint32_t readMessageEnd_virt() override { return protocol->readMessageEnd(); } - - uint32_t readStructBegin_virt(std::string& name) override { - return protocol->readStructBegin(name); - } - uint32_t readStructEnd_virt() override { return protocol->readStructEnd(); } - - uint32_t readFieldBegin_virt(std::string& name, TType& fieldType, int16_t& fieldId) override { - return protocol->readFieldBegin(name, fieldType, fieldId); - } - uint32_t readFieldEnd_virt() override { return protocol->readFieldEnd(); } - - uint32_t readMapBegin_virt(TType& keyType, TType& valType, uint32_t& size) override { - return protocol->readMapBegin(keyType, valType, size); - } - uint32_t readMapEnd_virt() override { return protocol->readMapEnd(); } - - uint32_t readListBegin_virt(TType& elemType, uint32_t& size) override { - return protocol->readListBegin(elemType, size); - } - uint32_t readListEnd_virt() override { return protocol->readListEnd(); } - - uint32_t readSetBegin_virt(TType& elemType, uint32_t& size) override { - return protocol->readSetBegin(elemType, size); - } - uint32_t readSetEnd_virt() override { return protocol->readSetEnd(); } - - uint32_t readBool_virt(bool& value) override { return protocol->readBool(value); } - uint32_t readBool_virt(duckdb::vector::reference value) override { - return protocol->readBool(value); - } - - uint32_t readByte_virt(int8_t& byte) override { return protocol->readByte(byte); } - - uint32_t readI16_virt(int16_t& i16) override { return protocol->readI16(i16); } - uint32_t readI32_virt(int32_t& i32) override { return protocol->readI32(i32); } - uint32_t readI64_virt(int64_t& i64) override { return protocol->readI64(i64); } - - uint32_t readDouble_virt(double& dub) override { return protocol->readDouble(dub); } - - uint32_t readString_virt(std::string& str) override { return protocol->readString(str); } - uint32_t readBinary_virt(std::string& str) override { return protocol->readBinary(str); } - -private: - shared_ptr protocol; -}; -} -} -} - -#endif // THRIFT_TPROTOCOLDECORATOR_H_ diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolException.h b/src/duckdb/third_party/thrift/thrift/protocol/TProtocolException.h deleted file mode 100644 index 1f4b963c2..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolException.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ 1 - -#include - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -/** - * Class to encapsulate all the possible types of protocol errors that may - * occur in various protocol systems. This provides a sort of generic - * wrapper around the vague UNIX E_ error codes that lets a common code - * base of error handling to be used for various types of protocols, i.e. - * pipes etc. - * - */ -class TProtocolException : public duckdb_apache::thrift::TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TProtocolExceptionType { - UNKNOWN = 0, - INVALID_DATA = 1, - NEGATIVE_SIZE = 2, - SIZE_LIMIT = 3, - BAD_VERSION = 4, - NOT_IMPLEMENTED = 5, - DEPTH_LIMIT = 6 - }; - - TProtocolException() : duckdb_apache::thrift::TException(), type_(UNKNOWN) {} - - TProtocolException(TProtocolExceptionType type) : duckdb_apache::thrift::TException(), type_(type) {} - - TProtocolException(const std::string& message) - : duckdb_apache::thrift::TException(message), type_(UNKNOWN) {} - - TProtocolException(TProtocolExceptionType type, const std::string& message) - : duckdb_apache::thrift::TException(message), type_(type) {} - - ~TProtocolException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TProtocolExceptionType getType() const { return type_; } - - const char* what() const noexcept override { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TProtocolException: Unknown protocol exception"; - case INVALID_DATA: - return "TProtocolException: Invalid data"; - case NEGATIVE_SIZE: - return "TProtocolException: Negative size"; - case SIZE_LIMIT: - return "TProtocolException: Exceeded size limit"; - case BAD_VERSION: - return "TProtocolException: Invalid version"; - case NOT_IMPLEMENTED: - return "TProtocolException: Not implemented"; - default: - return "TProtocolException: (Invalid exception type)"; - } - } else { - return message_.c_str(); - } - } - -protected: - /** - * Error code - */ - TProtocolExceptionType type_; -}; -} -} -} // duckdb_apache::thrift::protocol - -#endif // #ifndef _DUCKDB_THRIFT_PROTOCOL_TPROTOCOLEXCEPTION_H_ diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolTypes.h b/src/duckdb/third_party/thrift/thrift/protocol/TProtocolTypes.h deleted file mode 100644 index 4eaedd9d1..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TProtocolTypes.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ -#define THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ 1 - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -enum PROTOCOL_TYPES { - T_BINARY_PROTOCOL = 0, - T_JSON_PROTOCOL = 1, - T_COMPACT_PROTOCOL = 2, -}; -} -} -} // duckdb_apache::thrift::protocol - -#endif // #define _THRIFT_PROTOCOL_TPROTOCOLTYPES_H_ 1 diff --git a/src/duckdb/third_party/thrift/thrift/protocol/TVirtualProtocol.h b/src/duckdb/third_party/thrift/thrift/protocol/TVirtualProtocol.h deleted file mode 100644 index 81c34c642..000000000 --- a/src/duckdb/third_party/thrift/thrift/protocol/TVirtualProtocol.h +++ /dev/null @@ -1,514 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ -#define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1 - -#include "thrift/protocol/TProtocol.h" -#include "duckdb/common/vector.hpp" - -namespace duckdb_apache { -namespace thrift { -namespace protocol { - -using duckdb_apache::thrift::transport::TTransport; - -/** - * Helper class that provides default implementations of TProtocol methods. - * - * This class provides default implementations of the non-virtual TProtocol - * methods. It exists primarily so TVirtualProtocol can derive from it. It - * prevents TVirtualProtocol methods from causing infinite recursion if the - * non-virtual methods are not overridden by the TVirtualProtocol subclass. - * - * You probably don't want to use this class directly. Use TVirtualProtocol - * instead. - */ -class TProtocolDefaults : public TProtocol { -public: - uint32_t readMessageBegin(std::string& name, TMessageType& messageType, int32_t& seqid) { - (void)name; - (void)messageType; - (void)seqid; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMessageEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readStructBegin(std::string& name) { - (void)name; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readStructEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readFieldBegin(std::string& name, TType& fieldType, int16_t& fieldId) { - (void)name; - (void)fieldType; - (void)fieldId; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readFieldEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMapBegin(TType& keyType, TType& valType, uint32_t& size) { - (void)keyType; - (void)valType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readMapEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readListBegin(TType& elemType, uint32_t& size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readListEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readSetBegin(TType& elemType, uint32_t& size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readSetEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBool(bool& value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBool(duckdb::vector::reference value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readByte(int8_t& byte) { - (void)byte; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI16(int16_t& i16) { - (void)i16; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI32(int32_t& i32) { - (void)i32; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readI64(int64_t& i64) { - (void)i64; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readDouble(double& dub) { - (void)dub; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readString(std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t readBinary(std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support reading (yet)."); - } - - uint32_t writeMessageBegin(const std::string& name, - const TMessageType messageType, - const int32_t seqid) { - (void)name; - (void)messageType; - (void)seqid; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMessageEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeStructBegin(const char* name) { - (void)name; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeStructEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldBegin(const char* name, const TType fieldType, const int16_t fieldId) { - (void)name; - (void)fieldType; - (void)fieldId; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeFieldStop() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMapBegin(const TType keyType, const TType valType, const uint32_t size) { - (void)keyType; - (void)valType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeMapEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeListBegin(const TType elemType, const uint32_t size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeListEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeSetBegin(const TType elemType, const uint32_t size) { - (void)elemType; - (void)size; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeSetEnd() { - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeBool(const bool value) { - (void)value; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeByte(const int8_t byte) { - (void)byte; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI16(const int16_t i16) { - (void)i16; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI32(const int32_t i32) { - (void)i32; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeI64(const int64_t i64) { - (void)i64; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeDouble(const double dub) { - (void)dub; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeString(const std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t writeBinary(const std::string& str) { - (void)str; - throw TProtocolException(TProtocolException::NOT_IMPLEMENTED, - "this protocol does not support writing (yet)."); - } - - uint32_t skip(TType type) { return ::duckdb_apache::thrift::protocol::skip(*this, type); } - -protected: - TProtocolDefaults(std::shared_ptr ptrans) : TProtocol(ptrans) {} -}; - -/** - * Concrete TProtocol classes should inherit from TVirtualProtocol - * so they don't have to manually override virtual methods. - */ -template -class TVirtualProtocol : public Super_ { -public: - /** - * Writing functions. - */ - - uint32_t writeMessageBegin_virt(const std::string& name, - const TMessageType messageType, - const int32_t seqid) override { - return static_cast(this)->writeMessageBegin(name, messageType, seqid); - } - - uint32_t writeMessageEnd_virt() override { - return static_cast(this)->writeMessageEnd(); - } - - uint32_t writeStructBegin_virt(const char* name) override { - return static_cast(this)->writeStructBegin(name); - } - - uint32_t writeStructEnd_virt() override { return static_cast(this)->writeStructEnd(); } - - uint32_t writeFieldBegin_virt(const char* name, - const TType fieldType, - const int16_t fieldId) override { - return static_cast(this)->writeFieldBegin(name, fieldType, fieldId); - } - - uint32_t writeFieldEnd_virt() override { return static_cast(this)->writeFieldEnd(); } - - uint32_t writeFieldStop_virt() override { return static_cast(this)->writeFieldStop(); } - - uint32_t writeMapBegin_virt(const TType keyType, - const TType valType, - const uint32_t size) override { - return static_cast(this)->writeMapBegin(keyType, valType, size); - } - - uint32_t writeMapEnd_virt() override { return static_cast(this)->writeMapEnd(); } - - uint32_t writeListBegin_virt(const TType elemType, const uint32_t size) override { - return static_cast(this)->writeListBegin(elemType, size); - } - - uint32_t writeListEnd_virt() override { return static_cast(this)->writeListEnd(); } - - uint32_t writeSetBegin_virt(const TType elemType, const uint32_t size) override { - return static_cast(this)->writeSetBegin(elemType, size); - } - - uint32_t writeSetEnd_virt() override { return static_cast(this)->writeSetEnd(); } - - uint32_t writeBool_virt(const bool value) override { - return static_cast(this)->writeBool(value); - } - - uint32_t writeByte_virt(const int8_t byte) override { - return static_cast(this)->writeByte(byte); - } - - uint32_t writeI16_virt(const int16_t i16) override { - return static_cast(this)->writeI16(i16); - } - - uint32_t writeI32_virt(const int32_t i32) override { - return static_cast(this)->writeI32(i32); - } - - uint32_t writeI64_virt(const int64_t i64) override { - return static_cast(this)->writeI64(i64); - } - - uint32_t writeDouble_virt(const double dub) override { - return static_cast(this)->writeDouble(dub); - } - - uint32_t writeString_virt(const std::string& str) override { - return static_cast(this)->writeString(str); - } - - uint32_t writeBinary_virt(const std::string& str) override { - return static_cast(this)->writeBinary(str); - } - - /** - * Reading functions - */ - - uint32_t readMessageBegin_virt(std::string& name, - TMessageType& messageType, - int32_t& seqid) override { - return static_cast(this)->readMessageBegin(name, messageType, seqid); - } - - uint32_t readMessageEnd_virt() override { return static_cast(this)->readMessageEnd(); } - - uint32_t readStructBegin_virt(std::string& name) override { - return static_cast(this)->readStructBegin(name); - } - - uint32_t readStructEnd_virt() override { return static_cast(this)->readStructEnd(); } - - uint32_t readFieldBegin_virt(std::string& name, TType& fieldType, int16_t& fieldId) override { - return static_cast(this)->readFieldBegin(name, fieldType, fieldId); - } - - uint32_t readFieldEnd_virt() override { return static_cast(this)->readFieldEnd(); } - - uint32_t readMapBegin_virt(TType& keyType, TType& valType, uint32_t& size) override { - return static_cast(this)->readMapBegin(keyType, valType, size); - } - - uint32_t readMapEnd_virt() override { return static_cast(this)->readMapEnd(); } - - uint32_t readListBegin_virt(TType& elemType, uint32_t& size) override { - return static_cast(this)->readListBegin(elemType, size); - } - - uint32_t readListEnd_virt() override { return static_cast(this)->readListEnd(); } - - uint32_t readSetBegin_virt(TType& elemType, uint32_t& size) override { - return static_cast(this)->readSetBegin(elemType, size); - } - - uint32_t readSetEnd_virt() override { return static_cast(this)->readSetEnd(); } - - uint32_t readBool_virt(bool& value) override { - return static_cast(this)->readBool(value); - } - - uint32_t readBool_virt(duckdb::vector::reference value) override { - return static_cast(this)->readBool(value); - } - - uint32_t readByte_virt(int8_t& byte) override { - return static_cast(this)->readByte(byte); - } - - uint32_t readI16_virt(int16_t& i16) override { - return static_cast(this)->readI16(i16); - } - - uint32_t readI32_virt(int32_t& i32) override { - return static_cast(this)->readI32(i32); - } - - uint32_t readI64_virt(int64_t& i64) override { - return static_cast(this)->readI64(i64); - } - - uint32_t readDouble_virt(double& dub) override { - return static_cast(this)->readDouble(dub); - } - - uint32_t readString_virt(std::string& str) override { - return static_cast(this)->readString(str); - } - - uint32_t readBinary_virt(std::string& str) override { - return static_cast(this)->readBinary(str); - } - - uint32_t skip_virt(TType type) override { return static_cast(this)->skip(type); } - - /* - * Provide a default skip() implementation that uses non-virtual read - * methods. - * - * Note: subclasses that use TVirtualProtocol to derive from another protocol - * implementation (i.e., not TProtocolDefaults) should beware that this may - * override any non-default skip() implementation provided by the parent - * transport class. They may need to explicitly redefine skip() to call the - * correct parent implementation, if desired. - */ - uint32_t skip(TType type) { - auto* const prot = static_cast(this); - return ::duckdb_apache::thrift::protocol::skip(*prot, type); - } - - /* - * Provide a default readBool() implementation for use with - * duckdb::vector, that behaves the same as reading into a normal bool. - * - * Subclasses can override this if desired, but there normally shouldn't - * be a need to. - */ - uint32_t readBool(duckdb::vector::reference value) { - bool b = false; - uint32_t ret = static_cast(this)->readBool(b); - value = b; - return ret; - } - using Super_::readBool; // so we don't hide readBool(bool&) - -protected: - TVirtualProtocol(std::shared_ptr ptrans) : Super_(ptrans) {} -}; -} -} -} // duckdb_apache::thrift::protocol - -#endif // #define _DUCKDB_THRIFT_PROTOCOL_TVIRTUALPROTOCOL_H_ 1 diff --git a/src/duckdb/third_party/thrift/thrift/stdcxx.h b/src/duckdb/third_party/thrift/thrift/stdcxx.h deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/duckdb/third_party/thrift/thrift/thrift-config.h b/src/duckdb/third_party/thrift/thrift/thrift-config.h deleted file mode 100644 index 19e473cf6..000000000 --- a/src/duckdb/third_party/thrift/thrift/thrift-config.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef THRIFT_CONFIG_H -#define THRIFT_CONFIG_H - - -#ifdef _WIN32 -#if defined(_M_IX86) || defined(_M_X64) -#define ARITHMETIC_RIGHT_SHIFT 1 -#define SIGNED_RIGHT_SHIFT_IS 1 -#endif -#else -#define SIGNED_RIGHT_SHIFT_IS 1 -#define ARITHMETIC_RIGHT_SHIFT 1 -#endif - -#endif \ No newline at end of file diff --git a/src/duckdb/third_party/thrift/thrift/thrift_export.h b/src/duckdb/third_party/thrift/thrift/thrift_export.h deleted file mode 100644 index f5c059fb7..000000000 --- a/src/duckdb/third_party/thrift/thrift/thrift_export.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef THRIFT_EXPORT_H -#define THRIFT_EXPORT_H - -#ifdef THRIFT_STATIC_DEFINE -# define THRIFT_EXPORT -#elif defined(_MSC_VER ) -# ifndef THRIFT_EXPORT -# ifdef thrift_EXPORTS - /* We are building this library */ -# define THRIFT_EXPORT __declspec(dllexport) -# else - /* We are using this library */ -# define THRIFT_EXPORT __declspec(dllimport) -# endif -# endif -#else -# define THRIFT_EXPORT -#endif - -#endif /* THRIFT_EXPORT_H */ diff --git a/src/duckdb/third_party/thrift/thrift/transport/PlatformSocket.h b/src/duckdb/third_party/thrift/thrift/transport/PlatformSocket.h deleted file mode 100644 index c1b0c55ca..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/PlatformSocket.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -// clang-format off - -#ifndef _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ -# define _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ - -#ifdef _WIN32 -#ifdef _WINSOCKAPI_ -#undef _WINSOCKAPI_ -#endif -# include -# define THRIFT_GET_SOCKET_ERROR ::WSAGetLastError() -# define THRIFT_ERRNO (*_errno()) -# define THRIFT_EINPROGRESS WSAEINPROGRESS -# define THRIFT_EAGAIN WSAEWOULDBLOCK -# define THRIFT_EINTR WSAEINTR -# define THRIFT_ECONNRESET WSAECONNRESET -# define THRIFT_ENOTCONN WSAENOTCONN -# define THRIFT_ETIMEDOUT WSAETIMEDOUT -# define THRIFT_EWOULDBLOCK WSAEWOULDBLOCK -# define THRIFT_EPIPE WSAECONNRESET -# define THRIFT_NO_SOCKET_CACHING SO_EXCLUSIVEADDRUSE -# define THRIFT_SOCKET SOCKET -# define THRIFT_INVALID_SOCKET INVALID_SOCKET -# define THRIFT_SOCKETPAIR thrift_socketpair -# define THRIFT_FCNTL thrift_fcntl -# define THRIFT_O_NONBLOCK 1 -# define THRIFT_F_GETFL 0 -# define THRIFT_F_SETFL 1 -# define THRIFT_GETTIMEOFDAY thrift_gettimeofday -# define THRIFT_CLOSESOCKET closesocket -# define THRIFT_CLOSE _close -# define THRIFT_OPEN _open -# define THRIFT_FTRUNCATE _chsize_s -# define THRIFT_FSYNC _commit -# define THRIFT_LSEEK _lseek -# define THRIFT_WRITE _write -# define THRIFT_READ _read -# define THRIFT_IOCTL_SOCKET ioctlsocket -# define THRIFT_IOCTL_SOCKET_NUM_BYTES_TYPE u_long -# define THRIFT_FSTAT _fstat -# define THRIFT_STAT _stat -# ifdef _WIN32_WCE -# define THRIFT_GAI_STRERROR(...) thrift_wstr2str(gai_strerrorW(__VA_ARGS__)) -# else -# define THRIFT_GAI_STRERROR gai_strerrorA -# endif -# define THRIFT_SSIZET ptrdiff_t -# if (_MSC_VER < 1900) -# define THRIFT_SNPRINTF _snprintf -# else -# define THRIFT_SNPRINTF snprintf -# endif -# define THRIFT_SLEEP_SEC thrift_sleep -# define THRIFT_SLEEP_USEC thrift_usleep -# define THRIFT_TIMESPEC thrift_timespec -# define THRIFT_CTIME_R thrift_ctime_r -# define THRIFT_POLL thrift_poll -# if WINVER <= 0x0502 //XP, Server2003 -# define THRIFT_POLLFD thrift_pollfd -# define THRIFT_POLLIN 0x0300 -# define THRIFT_POLLOUT 0x0010 -# else //Vista, Win7... -# define THRIFT_POLLFD pollfd -# define THRIFT_POLLIN POLLIN -# define THRIFT_POLLOUT POLLOUT -# endif //WINVER -# define THRIFT_SHUT_RDWR SD_BOTH -# if !defined(AI_ADDRCONFIG) -# define AI_ADDRCONFIG 0x00000400 -# endif -#else //not _WIN32 -# include -# define THRIFT_GET_SOCKET_ERROR errno -# define THRIFT_ERRNO errno -# define THRIFT_EINTR EINTR -# define THRIFT_EINPROGRESS EINPROGRESS -# define THRIFT_ECONNRESET ECONNRESET -# define THRIFT_ENOTCONN ENOTCONN -# define THRIFT_ETIMEDOUT ETIMEDOUT -# define THRIFT_EWOULDBLOCK EWOULDBLOCK -# define THRIFT_EAGAIN EAGAIN -# define THRIFT_EPIPE EPIPE -# define THRIFT_NO_SOCKET_CACHING SO_REUSEADDR -# define THRIFT_SOCKET int -# define THRIFT_INVALID_SOCKET (-1) -# define THRIFT_SOCKETPAIR socketpair -# define THRIFT_FCNTL fcntl -# define THRIFT_O_NONBLOCK O_NONBLOCK -# define THRIFT_F_GETFL F_GETFL -# define THRIFT_F_SETFL F_SETFL -# define THRIFT_GETTIMEOFDAY gettimeofday -# define THRIFT_CLOSESOCKET close -# define THRIFT_CLOSE close -# define THRIFT_OPEN open -# define THRIFT_FTRUNCATE ftruncate -# define THRIFT_FSYNC fsync -# define THRIFT_LSEEK lseek -# define THRIFT_WRITE write -# define THRIFT_READ read -# define THRIFT_IOCTL_SOCKET ioctl -# define THRIFT_IOCTL_SOCKET_NUM_BYTES_TYPE int -# define THRIFT_STAT stat -# define THRIFT_FSTAT fstat -# define THRIFT_GAI_STRERROR gai_strerror -# define THRIFT_SSIZET ssize_t -# define THRIFT_SNPRINTF snprintf -# define THRIFT_SLEEP_SEC sleep -# define THRIFT_SLEEP_USEC usleep -# define THRIFT_TIMESPEC timespec -# define THRIFT_CTIME_R ctime_r -# define THRIFT_POLL poll -# define THRIFT_POLLFD pollfd -# define THRIFT_POLLIN POLLIN -# define THRIFT_POLLOUT POLLOUT -# define THRIFT_SHUT_RDWR SHUT_RDWR -#endif - -#endif // _DUCKDB_THRIFT_TRANSPORT_PLATFORM_SOCKET_H_ diff --git a/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp b/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp deleted file mode 100644 index 2b0e470d5..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include -#include -#include "thrift/thrift_export.h" - -#include "thrift/transport/TBufferTransports.h" - -using std::string; - -namespace duckdb_apache { -namespace thrift { -namespace transport { - - - - -void TMemoryBuffer::computeRead(uint32_t len, uint8_t** out_start, uint32_t* out_give) { - // Correct rBound_ so we can use the fast path in the future. - rBound_ = wBase_; - - // Decide how much to give. - uint32_t give = (std::min)(len, available_read()); - - *out_start = rBase_; - *out_give = give; - - // Preincrement rBase_ so the caller doesn't have to. - rBase_ += give; -} - -uint32_t TMemoryBuffer::readSlow(uint8_t* buf, uint32_t len) { - uint8_t* start; - uint32_t give; - computeRead(len, &start, &give); - - // Copy into the provided buffer. - memcpy(buf, start, give); - - return give; -} - -uint32_t TMemoryBuffer::readAppendToString(std::string& str, uint32_t len) { - // Don't get some stupid assertion failure. - if (buffer_ == nullptr) { - return 0; - } - - uint8_t* start; - uint32_t give; - computeRead(len, &start, &give); - - // Append to the provided string. - str.append((char*)start, give); - - return give; -} - -void TMemoryBuffer::ensureCanWrite(uint32_t len) { - // Check available space - uint32_t avail = available_write(); - if (len <= avail) { - return; - } - - if (!owner_) { - throw TTransportException("Insufficient space in external MemoryBuffer"); - } - - // Grow the buffer as necessary. - uint64_t new_size = bufferSize_; - while (len > avail) { - new_size = new_size > 0 ? new_size * 2 : 1; - if (new_size > maxBufferSize_) { - throw TTransportException(TTransportException::BAD_ARGS, - "Internal buffer size overflow"); - } - avail = available_write() + (static_cast(new_size) - bufferSize_); - } - - // Allocate into a new pointer so we don't bork ours if it fails. - auto* new_buffer = static_cast(std::realloc(buffer_, new_size)); - if (new_buffer == nullptr) { - throw std::bad_alloc(); - } - - rBase_ = new_buffer + (rBase_ - buffer_); - rBound_ = new_buffer + (rBound_ - buffer_); - wBase_ = new_buffer + (wBase_ - buffer_); - wBound_ = new_buffer + new_size; - buffer_ = new_buffer; - bufferSize_ = static_cast(new_size); -} - -void TMemoryBuffer::writeSlow(const uint8_t* buf, uint32_t len) { - ensureCanWrite(len); - - // Copy into the buffer and increment wBase_. - memcpy(wBase_, buf, len); - wBase_ += len; -} - -void TMemoryBuffer::wroteBytes(uint32_t len) { - uint32_t avail = available_write(); - if (len > avail) { - throw TTransportException("Client wrote more bytes than size of buffer."); - } - wBase_ += len; -} - -const uint8_t* TMemoryBuffer::borrowSlow(uint8_t* buf, uint32_t* len) { - (void)buf; - rBound_ = wBase_; - if (available_read() >= *len) { - *len = available_read(); - return rBase_; - } - return nullptr; -} -} -} -} // duckdb_apache::thrift::transport diff --git a/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.h b/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.h deleted file mode 100644 index 9381f3892..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.h +++ /dev/null @@ -1,488 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ 1 - -#include -#include -#include -#include - -#include "thrift/transport/TTransport.h" -#include "thrift/transport/TVirtualTransport.h" - -#ifdef __GNUC__ -#define TDB_LIKELY(val) (__builtin_expect((val), 1)) -#define TDB_UNLIKELY(val) (__builtin_expect((val), 0)) -#else -#define TDB_LIKELY(val) (val) -#define TDB_UNLIKELY(val) (val) -#endif - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Base class for all transports that use read/write buffers for performance. - * - * TBufferBase is designed to implement the fast-path "memcpy" style - * operations that work in the common case. It does so with small and - * (eventually) nonvirtual, inlinable methods. TBufferBase is an abstract - * class. Subclasses are expected to define the "slow path" operations - * that have to be done when the buffers are full or empty. - * - */ -class TBufferBase : public TVirtualTransport { - -public: - /** - * Fast-path read. - * - * When we have enough data buffered to fulfill the read, we can satisfy it - * with a single memcpy, then adjust our internal pointers. If the buffer - * is empty, we call out to our slow path, implemented by a subclass. - * This method is meant to eventually be nonvirtual and inlinable. - */ - uint32_t read(uint8_t* buf, uint32_t len) { - uint8_t* new_rBase = rBase_ + len; - if (TDB_LIKELY(new_rBase <= rBound_)) { - std::memcpy(buf, rBase_, len); - rBase_ = new_rBase; - return len; - } - return readSlow(buf, len); - } - - /** - * Shortcutted version of readAll. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - uint8_t* new_rBase = rBase_ + len; - if (TDB_LIKELY(new_rBase <= rBound_)) { - std::memcpy(buf, rBase_, len); - rBase_ = new_rBase; - return len; - } - return duckdb_apache::thrift::transport::readAll(*this, buf, len); - } - - /** - * Fast-path write. - * - * When we have enough empty space in our buffer to accommodate the write, we - * can satisfy it with a single memcpy, then adjust our internal pointers. - * If the buffer is full, we call out to our slow path, implemented by a - * subclass. This method is meant to eventually be nonvirtual and - * inlinable. - */ - void write(const uint8_t* buf, uint32_t len) { - uint8_t* new_wBase = wBase_ + len; - if (TDB_LIKELY(new_wBase <= wBound_)) { - std::memcpy(wBase_, buf, len); - wBase_ = new_wBase; - return; - } - writeSlow(buf, len); - } - - /** - * Fast-path borrow. A lot like the fast-path read. - */ - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - if (TDB_LIKELY(static_cast(*len) <= rBound_ - rBase_)) { - // With strict aliasing, writing to len shouldn't force us to - // refetch rBase_ from memory. TODO(dreiss): Verify this. - *len = static_cast(rBound_ - rBase_); - return rBase_; - } - return borrowSlow(buf, len); - } - - /** - * Consume doesn't require a slow path. - */ - void consume(uint32_t len) { - if (TDB_LIKELY(static_cast(len) <= rBound_ - rBase_)) { - rBase_ += len; - } else { - throw TTransportException(TTransportException::BAD_ARGS, "consume did not follow a borrow."); - } - } - -protected: - /// Slow path read. - virtual uint32_t readSlow(uint8_t* buf, uint32_t len) = 0; - - /// Slow path write. - virtual void writeSlow(const uint8_t* buf, uint32_t len) = 0; - - /** - * Slow path borrow. - * - * POSTCONDITION: return == NULL || rBound_ - rBase_ >= *len - */ - virtual const uint8_t* borrowSlow(uint8_t* buf, uint32_t* len) = 0; - - /** - * Trivial constructor. - * - * Initialize pointers safely. Constructing is not a very - * performance-sensitive operation, so it is okay to just leave it to - * the concrete class to set up pointers correctly. - */ - TBufferBase() : rBase_(nullptr), rBound_(nullptr), wBase_(nullptr), wBound_(nullptr) {} - - /// Convenience mutator for setting the read buffer. - void setReadBuffer(uint8_t* buf, uint32_t len) { - rBase_ = buf; - rBound_ = buf + len; - } - - /// Convenience mutator for setting the write buffer. - void setWriteBuffer(uint8_t* buf, uint32_t len) { - wBase_ = buf; - wBound_ = buf + len; - } - - ~TBufferBase() override = default; - - /// Reads begin here. - uint8_t* rBase_; - /// Reads may extend to just before here. - uint8_t* rBound_; - - /// Writes begin here. - uint8_t* wBase_; - /// Writes may extend to just before here. - uint8_t* wBound_; -}; - - -/** - * A memory buffer is a transport that simply reads from and writes to an - * in memory buffer. Anytime you call write on it, the data is simply placed - * into a buffer, and anytime you call read, data is read from that buffer. - * - * The buffers are allocated using C constructs malloc,realloc, and the size - * doubles as necessary. We've considered using scoped - * - */ -class TMemoryBuffer : public TVirtualTransport { -private: - // Common initialization done by all constructors. - void initCommon(uint8_t* buf, uint32_t size, bool owner, uint32_t wPos) { - - maxBufferSize_ = (std::numeric_limits::max)(); - - if (buf == nullptr && size != 0) { - assert(owner); - buf = (uint8_t*)std::malloc(size); - if (buf == nullptr) { - throw std::bad_alloc(); - } - } - - buffer_ = buf; - bufferSize_ = size; - - rBase_ = buffer_; - rBound_ = buffer_ + wPos; - // TODO(dreiss): Investigate NULL-ing this if !owner. - wBase_ = buffer_ + wPos; - wBound_ = buffer_ + bufferSize_; - - owner_ = owner; - - // rBound_ is really an artifact. In principle, it should always be - // equal to wBase_. We update it in a few places (computeRead, etc.). - } - -public: - static const uint32_t defaultSize = 1024; - - /** - * This enum specifies how a TMemoryBuffer should treat - * memory passed to it via constructors or resetBuffer. - * - * OBSERVE: - * TMemoryBuffer will simply store a pointer to the memory. - * It is the callers responsibility to ensure that the pointer - * remains valid for the lifetime of the TMemoryBuffer, - * and that it is properly cleaned up. - * Note that no data can be written to observed buffers. - * - * COPY: - * TMemoryBuffer will make an internal copy of the buffer. - * The caller has no responsibilities. - * - * TAKE_OWNERSHIP: - * TMemoryBuffer will become the "owner" of the buffer, - * and will be responsible for freeing it. - * The membory must have been allocated with malloc. - */ - enum MemoryPolicy { OBSERVE = 1, COPY = 2, TAKE_OWNERSHIP = 3 }; - - /** - * Construct a TMemoryBuffer with a default-sized buffer, - * owned by the TMemoryBuffer object. - */ - TMemoryBuffer() { initCommon(nullptr, defaultSize, true, 0); } - - /** - * Construct a TMemoryBuffer with a buffer of a specified size, - * owned by the TMemoryBuffer object. - * - * @param sz The initial size of the buffer. - */ - TMemoryBuffer(uint32_t sz) { initCommon(nullptr, sz, true, 0); } - - /** - * Construct a TMemoryBuffer with buf as its initial contents. - * - * @param buf The initial contents of the buffer. - * Note that, while buf is a non-const pointer, - * TMemoryBuffer will not write to it if policy == OBSERVE, - * so it is safe to const_cast(whatever). - * @param sz The size of @c buf. - * @param policy See @link MemoryPolicy @endlink . - */ - TMemoryBuffer(uint8_t* buf, uint32_t sz, MemoryPolicy policy = OBSERVE) { - if (buf == nullptr && sz != 0) { - throw TTransportException(TTransportException::BAD_ARGS, - "TMemoryBuffer given null buffer with non-zero size."); - } - - switch (policy) { - case OBSERVE: - case TAKE_OWNERSHIP: - initCommon(buf, sz, policy == TAKE_OWNERSHIP, sz); - break; - case COPY: - initCommon(nullptr, sz, true, 0); - this->write(buf, sz); - break; - default: - throw TTransportException(TTransportException::BAD_ARGS, - "Invalid MemoryPolicy for TMemoryBuffer"); - } - } - - ~TMemoryBuffer() override { - if (owner_) { - std::free(buffer_); - } - } - - bool isOpen() const override { return true; } - - bool peek() override { return (rBase_ < wBase_); } - - void open() override {} - - void close() override {} - - // TODO(dreiss): Make bufPtr const. - void getBuffer(uint8_t** bufPtr, uint32_t* sz) { - *bufPtr = rBase_; - *sz = static_cast(wBase_ - rBase_); - } - - std::string getBufferAsString() { - if (buffer_ == nullptr) { - return ""; - } - uint8_t* buf; - uint32_t sz; - getBuffer(&buf, &sz); - return std::string((char*)buf, (std::string::size_type)sz); - } - - void appendBufferToString(std::string& str) { - if (buffer_ == nullptr) { - return; - } - uint8_t* buf; - uint32_t sz; - getBuffer(&buf, &sz); - str.append((char*)buf, sz); - } - - void resetBuffer() { - rBase_ = buffer_; - rBound_ = buffer_; - wBase_ = buffer_; - // It isn't safe to write into a buffer we don't own. - if (!owner_) { - wBound_ = wBase_; - bufferSize_ = 0; - } - } - - /// See constructor documentation. - void resetBuffer(uint8_t* buf, uint32_t sz, MemoryPolicy policy = OBSERVE) { - // Use a variant of the copy-and-swap trick for assignment operators. - // This is sub-optimal in terms of performance for two reasons: - // 1/ The constructing and swapping of the (small) values - // in the temporary object takes some time, and is not necessary. - // 2/ If policy == COPY, we allocate the new buffer before - // freeing the old one, precluding the possibility of - // reusing that memory. - // I doubt that either of these problems could be optimized away, - // but the second is probably no a common case, and the first is minor. - // I don't expect resetBuffer to be a common operation, so I'm willing to - // bite the performance bullet to make the method this simple. - - // Construct the new buffer. - TMemoryBuffer new_buffer(buf, sz, policy); - // Move it into ourself. - this->swap(new_buffer); - // Our old self gets destroyed. - } - - /// See constructor documentation. - void resetBuffer(uint32_t sz) { - // Construct the new buffer. - TMemoryBuffer new_buffer(sz); - // Move it into ourself. - this->swap(new_buffer); - // Our old self gets destroyed. - } - - std::string readAsString(uint32_t len) { - std::string str; - (void)readAppendToString(str, len); - return str; - } - - uint32_t readAppendToString(std::string& str, uint32_t len); - - // return number of bytes read - uint32_t readEnd() override { - // This cast should be safe, because buffer_'s size is a uint32_t - auto bytes = static_cast(rBase_ - buffer_); - if (rBase_ == wBase_) { - resetBuffer(); - } - return bytes; - } - - // Return number of bytes written - uint32_t writeEnd() override { - // This cast should be safe, because buffer_'s size is a uint32_t - return static_cast(wBase_ - buffer_); - } - - uint32_t available_read() const { - // Remember, wBase_ is the real rBound_. - return static_cast(wBase_ - rBase_); - } - - uint32_t available_write() const { return static_cast(wBound_ - wBase_); } - - // Returns a pointer to where the client can write data to append to - // the TMemoryBuffer, and ensures the buffer is big enough to accommodate a - // write of the provided length. The returned pointer is very convenient for - // passing to read(), recv(), or similar. You must call wroteBytes() as soon - // as data is written or the buffer will not be aware that data has changed. - uint8_t* getWritePtr(uint32_t len) { - ensureCanWrite(len); - return wBase_; - } - - // Informs the buffer that the client has written 'len' bytes into storage - // that had been provided by getWritePtr(). - void wroteBytes(uint32_t len); - - /* - * TVirtualTransport provides a default implementation of readAll(). - * We want to use the TBufferBase version instead. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { return TBufferBase::readAll(buf, len); } - - //! \brief Get the current buffer size - //! \returns the current buffer size - uint32_t getBufferSize() const { - return bufferSize_; - } - - //! \brief Get the current maximum buffer size - //! \returns the current maximum buffer size - uint32_t getMaxBufferSize() const { - return maxBufferSize_; - } - - //! \brief Change the maximum buffer size - //! \param[in] maxSize the new maximum buffer size allowed to grow to - //! \throws TTransportException(BAD_ARGS) if maxSize is less than the current buffer size - void setMaxBufferSize(uint32_t maxSize) { - if (maxSize < bufferSize_) { - throw TTransportException(TTransportException::BAD_ARGS, - "Maximum buffer size would be less than current buffer size"); - } - maxBufferSize_ = maxSize; - } - -protected: - void swap(TMemoryBuffer& that) { - using std::swap; - swap(buffer_, that.buffer_); - swap(bufferSize_, that.bufferSize_); - - swap(rBase_, that.rBase_); - swap(rBound_, that.rBound_); - swap(wBase_, that.wBase_); - swap(wBound_, that.wBound_); - - swap(owner_, that.owner_); - } - - // Make sure there's at least 'len' bytes available for writing. - void ensureCanWrite(uint32_t len); - - // Compute the position and available data for reading. - void computeRead(uint32_t len, uint8_t** out_start, uint32_t* out_give); - - uint32_t readSlow(uint8_t* buf, uint32_t len) override; - - void writeSlow(const uint8_t* buf, uint32_t len) override; - - const uint8_t* borrowSlow(uint8_t* buf, uint32_t* len) override; - - // Data buffer - uint8_t* buffer_; - - // Allocated buffer size - uint32_t bufferSize_; - - // Maximum allowed size - uint32_t maxBufferSize_; - - // Is this object the owner of the buffer? - bool owner_; - - // Don't forget to update constrctors, initCommon, and swap if - // you add new members. -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TBUFFERTRANSPORTS_H_ diff --git a/src/duckdb/third_party/thrift/thrift/transport/TTransport.h b/src/duckdb/third_party/thrift/thrift/transport/TTransport.h deleted file mode 100644 index 8d5d86b16..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TTransport.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ 1 - -#include "thrift/Thrift.h" -#include "thrift/transport/TTransportException.h" -#include -#include - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Helper template to hoist readAll implementation out of TTransport - */ -template -uint32_t readAll(Transport_& trans, uint8_t* buf, uint32_t len) { - uint32_t have = 0; - uint32_t get = 0; - - while (have < len) { - get = trans.read(buf + have, len - have); - if (get <= 0) { - throw TTransportException(TTransportException::END_OF_FILE, "No more data to read."); - } - have += get; - } - - return have; -} - -/** - * Generic interface for a method of transporting data. A TTransport may be - * capable of either reading or writing, but not necessarily both. - * - */ -class TTransport { -public: - /** - * Virtual deconstructor. - */ - virtual ~TTransport() = default; - - /** - * Whether this transport is open. - */ - virtual bool isOpen() const { return false; } - - /** - * Tests whether there is more data to read or if the remote side is - * still open. By default this is true whenever the transport is open, - * but implementations should add logic to test for this condition where - * possible (i.e. on a socket). - * This is used by a server to check if it should listen for another - * request. - */ - virtual bool peek() { return isOpen(); } - - /** - * Opens the transport for communications. - * - * @return bool Whether the transport was successfully opened - * @throws TTransportException if opening failed - */ - virtual void open() { - throw TTransportException(TTransportException::NOT_OPEN, "Cannot open base TTransport."); - } - - /** - * Closes the transport. - */ - virtual void close() { - throw TTransportException(TTransportException::NOT_OPEN, "Cannot close base TTransport."); - } - - /** - * Attempt to read up to the specified number of bytes into the string. - * - * @param buf Reference to the location to write the data - * @param len How many bytes to read - * @return How many bytes were actually read - * @throws TTransportException If an error occurs - */ - uint32_t read(uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - return read_virt(buf, len); - } - virtual uint32_t read_virt(uint8_t* /* buf */, uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot read."); - } - - /** - * Reads the given amount of data in its entirety no matter what. - * - * @param s Reference to location for read data - * @param len How many bytes to read - * @return How many bytes read, which must be equal to size - * @throws TTransportException If insufficient data was read - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - return readAll_virt(buf, len); - } - virtual uint32_t readAll_virt(uint8_t* buf, uint32_t len) { - return duckdb_apache::thrift::transport::readAll(*this, buf, len); - } - - /** - * Called when read is completed. - * This can be over-ridden to perform a transport-specific action - * e.g. logging the request to a file - * - * @return number of bytes read if available, 0 otherwise. - */ - virtual uint32_t readEnd() { - // default behaviour is to do nothing - return 0; - } - - /** - * Writes the string in its entirety to the buffer. - * - * Note: You must call flush() to ensure the data is actually written, - * and available to be read back in the future. Destroying a TTransport - * object does not automatically flush pending data--if you destroy a - * TTransport object with written but unflushed data, that data may be - * discarded. - * - * @param buf The data to write out - * @throws TTransportException if an error occurs - */ - void write(const uint8_t* buf, uint32_t len) { - T_VIRTUAL_CALL(); - write_virt(buf, len); - } - virtual void write_virt(const uint8_t* /* buf */, uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot write."); - } - - /** - * Called when write is completed. - * This can be over-ridden to perform a transport-specific action - * at the end of a request. - * - * @return number of bytes written if available, 0 otherwise - */ - virtual uint32_t writeEnd() { - // default behaviour is to do nothing - return 0; - } - - /** - * Flushes any pending data to be written. Typically used with buffered - * transport mechanisms. - * - * @throws TTransportException if an error occurs - */ - virtual void flush() { - // default behaviour is to do nothing - } - - /** - * Attempts to return a pointer to \c len bytes, possibly copied into \c buf. - * Does not consume the bytes read (i.e.: a later read will return the same - * data). This method is meant to support protocols that need to read - * variable-length fields. They can attempt to borrow the maximum amount of - * data that they will need, then consume (see next method) what they - * actually use. Some transports will not support this method and others - * will fail occasionally, so protocols must be prepared to use read if - * borrow fails. - * - * @oaram buf A buffer where the data can be stored if needed. - * If borrow doesn't return buf, then the contents of - * buf after the call are undefined. This parameter may be - * NULL to indicate that the caller is not supplying storage, - * but would like a pointer into an internal buffer, if - * available. - * @param len *len should initially contain the number of bytes to borrow. - * If borrow succeeds, *len will contain the number of bytes - * available in the returned pointer. This will be at least - * what was requested, but may be more if borrow returns - * a pointer to an internal buffer, rather than buf. - * If borrow fails, the contents of *len are undefined. - * @return If the borrow succeeds, return a pointer to the borrowed data. - * This might be equal to \c buf, or it might be a pointer into - * the transport's internal buffers. - * @throws TTransportException if an error occurs - */ - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - T_VIRTUAL_CALL(); - return borrow_virt(buf, len); - } - virtual const uint8_t* borrow_virt(uint8_t* /* buf */, uint32_t* /* len */) { return nullptr; } - - /** - * Remove len bytes from the transport. This should always follow a borrow - * of at least len bytes, and should always succeed. - * TODO(dreiss): Is there any transport that could borrow but fail to - * consume, or that would require a buffer to dump the consumed data? - * - * @param len How many bytes to consume - * @throws TTransportException If an error occurs - */ - void consume(uint32_t len) { - T_VIRTUAL_CALL(); - consume_virt(len); - } - virtual void consume_virt(uint32_t /* len */) { - throw TTransportException(TTransportException::NOT_OPEN, "Base TTransport cannot consume."); - } - - /** - * Returns the origin of the transports call. The value depends on the - * transport used. An IP based transport for example will return the - * IP address of the client making the request. - * If the transport doesn't know the origin Unknown is returned. - * - * The returned value can be used in a log message for example - */ - virtual const std::string getOrigin() const { return "Unknown"; } - -protected: - /** - * Simple constructor. - */ - TTransport() = default; -}; - -/** - * Generic factory class to make an input and output transport out of a - * source transport. Commonly used inside servers to make input and output - * streams out of raw clients. - * - */ -class TTransportFactory { -public: - TTransportFactory() = default; - - virtual ~TTransportFactory() = default; - - /** - * Default implementation does nothing, just returns the transport given. - */ - virtual std::shared_ptr getTransport(std::shared_ptr trans) { - return trans; - } -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORT_H_ diff --git a/src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp b/src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp deleted file mode 100644 index de7431cc9..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#include "thrift/transport/TTransportException.h" -#include - -#include "thrift/thrift-config.h" - -using std::string; - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -const char* TTransportException::what() const noexcept { - if (message_.empty()) { - switch (type_) { - case UNKNOWN: - return "TTransportException: Unknown transport exception"; - case NOT_OPEN: - return "TTransportException: Transport not open"; - case TIMED_OUT: - return "TTransportException: Timed out"; - case END_OF_FILE: - return "TTransportException: End of file"; - case INTERRUPTED: - return "TTransportException: Interrupted"; - case BAD_ARGS: - return "TTransportException: Invalid arguments"; - case CORRUPTED_DATA: - return "TTransportException: Corrupted Data"; - case INTERNAL_ERROR: - return "TTransportException: Internal error"; - default: - return "TTransportException: (Invalid exception type)"; - } - } else { - return message_.c_str(); - } -} -} -} -} // duckdb_apache::thrift::transport diff --git a/src/duckdb/third_party/thrift/thrift/transport/TTransportException.h b/src/duckdb/third_party/thrift/thrift/transport/TTransportException.h deleted file mode 100644 index 1cfafa543..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TTransportException.h +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ 1 - -#include -#include "thrift/Thrift.h" - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Class to encapsulate all the possible types of transport errors that may - * occur in various transport systems. This provides a sort of generic - * wrapper around the vague UNIX E_ error codes that lets a common code - * base of error handling to be used for various types of transports, i.e. - * pipes etc. - * - */ -class TTransportException : public duckdb_apache::thrift::TException { -public: - /** - * Error codes for the various types of exceptions. - */ - enum TTransportExceptionType { - UNKNOWN = 0, - NOT_OPEN = 1, - TIMED_OUT = 2, - END_OF_FILE = 3, - INTERRUPTED = 4, - BAD_ARGS = 5, - CORRUPTED_DATA = 6, - INTERNAL_ERROR = 7 - }; - - TTransportException() : duckdb_apache::thrift::TException(), type_(UNKNOWN) {} - - TTransportException(TTransportExceptionType type) : duckdb_apache::thrift::TException(), type_(type) {} - - TTransportException(const std::string& message) - : duckdb_apache::thrift::TException(message), type_(UNKNOWN) {} - - TTransportException(TTransportExceptionType type, const std::string& message) - : duckdb_apache::thrift::TException(message), type_(type) {} - - TTransportException(TTransportExceptionType type, const std::string& message, int errno_copy) - : duckdb_apache::thrift::TException(message), type_(type) {} - - ~TTransportException() noexcept override = default; - - /** - * Returns an error code that provides information about the type of error - * that has occurred. - * - * @return Error code - */ - TTransportExceptionType getType() const noexcept { return type_; } - - const char* what() const noexcept override; - -protected: - /** Just like strerror_r but returns a C++ string object. */ - std::string strerror_s(int errno_copy); - - /** Error code */ - TTransportExceptionType type_; -}; - -///** -// * Legacy code in transport implementations have overflow issues -// * that need to be enforced. -// */ -//template To safe_numeric_cast(From i) { -// try { -// return boost::numeric_cast(i); -// } -// catch (const std::bad_cast& bc) { -// throw TTransportException(TTransportException::CORRUPTED_DATA, -// bc.what()); -// } -//} - -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TTRANSPORTEXCEPTION_H_ diff --git a/src/duckdb/third_party/thrift/thrift/transport/TVirtualTransport.h b/src/duckdb/third_party/thrift/thrift/transport/TVirtualTransport.h deleted file mode 100644 index 3d70695a2..000000000 --- a/src/duckdb/third_party/thrift/thrift/transport/TVirtualTransport.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -#ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ -#define _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ 1 - -#include "thrift/transport/TTransport.h" - -namespace duckdb_apache { -namespace thrift { -namespace transport { - -/** - * Helper class that provides default implementations of TTransport methods. - * - * This class provides default implementations of read(), readAll(), write(), - * borrow() and consume(). - * - * In the TTransport base class, each of these methods simply invokes its - * virtual counterpart. This class overrides them to always perform the - * default behavior, without a virtual function call. - * - * The primary purpose of this class is to serve as a base class for - * TVirtualTransport, and prevent infinite recursion if one of its subclasses - * does not override the TTransport implementation of these methods. (Since - * TVirtualTransport::read_virt() calls read(), and TTransport::read() calls - * read_virt().) - */ -class TTransportDefaults : public TTransport { -public: - /* - * TTransport *_virt() methods provide reasonable default implementations. - * Invoke them non-virtually. - */ - uint32_t read(uint8_t* buf, uint32_t len) { return this->TTransport::read_virt(buf, len); } - uint32_t readAll(uint8_t* buf, uint32_t len) { return this->TTransport::readAll_virt(buf, len); } - void write(const uint8_t* buf, uint32_t len) { this->TTransport::write_virt(buf, len); } - const uint8_t* borrow(uint8_t* buf, uint32_t* len) { - return this->TTransport::borrow_virt(buf, len); - } - void consume(uint32_t len) { this->TTransport::consume_virt(len); } - -protected: - TTransportDefaults() = default; -}; - -/** - * Helper class to provide polymorphism for subclasses of TTransport. - * - * This class implements *_virt() methods of TTransport, to call the - * non-virtual versions of these functions in the proper subclass. - * - * To define your own transport class using TVirtualTransport: - * 1) Derive your subclass from TVirtualTransport - * e.g: class MyTransport : public TVirtualTransport { - * 2) Provide your own implementations of read(), readAll(), etc. - * These methods should be non-virtual. - * - * Transport implementations that need to use virtual inheritance when - * inheriting from TTransport cannot use TVirtualTransport. - * - * @author Chad Walters - */ -template -class TVirtualTransport : public Super_ { -public: - /* - * Implementations of the *_virt() functions, to call the subclass's - * non-virtual implementation function. - */ - uint32_t read_virt(uint8_t* buf, uint32_t len) override { - return static_cast(this)->read(buf, len); - } - - uint32_t readAll_virt(uint8_t* buf, uint32_t len) override { - return static_cast(this)->readAll(buf, len); - } - - void write_virt(const uint8_t* buf, uint32_t len) override { - static_cast(this)->write(buf, len); - } - - const uint8_t* borrow_virt(uint8_t* buf, uint32_t* len) override { - return static_cast(this)->borrow(buf, len); - } - - void consume_virt(uint32_t len) override { static_cast(this)->consume(len); } - - /* - * Provide a default readAll() implementation that invokes - * read() non-virtually. - * - * Note: subclasses that use TVirtualTransport to derive from another - * transport implementation (i.e., not TTransportDefaults) should beware that - * this may override any non-default readAll() implementation provided by - * the parent transport class. They may need to redefine readAll() to call - * the correct parent implementation, if desired. - */ - uint32_t readAll(uint8_t* buf, uint32_t len) { - auto* trans = static_cast(this); - return ::duckdb_apache::thrift::transport::readAll(*trans, buf, len); - } - -protected: - TVirtualTransport() = default; - - /* - * Templatized constructors, to allow arguments to be passed to the Super_ - * constructor. Currently we only support 0, 1, or 2 arguments, but - * additional versions can be added as needed. - */ - template - TVirtualTransport(Arg_ const& arg) - : Super_(arg) {} - - template - TVirtualTransport(Arg1_ const& a1, Arg2_ const& a2) - : Super_(a1, a2) {} -}; -} -} -} // duckdb_apache::thrift::transport - -#endif // #ifndef _DUCKDB_THRIFT_TRANSPORT_TVIRTUALTRANSPORT_H_ diff --git a/src/duckdb/third_party/zstd/common/entropy_common.cpp b/src/duckdb/third_party/zstd/common/entropy_common.cpp deleted file mode 100644 index bd41d220a..000000000 --- a/src/duckdb/third_party/zstd/common/entropy_common.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* ****************************************************************** - * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************* -* Dependencies -***************************************/ -#include "zstd/common/mem.h" -#include "zstd/common/error_private.h" /* ERR_*, ERROR */ -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" - -namespace duckdb_zstd { - -/*=== Version ===*/ -unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } - - -/*=== Error Management ===*/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } -const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } - -unsigned HUF_isError(size_t code) { return ERR_isError(code); } -const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } - - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) -{ - const BYTE* const istart = (const BYTE*) headerBuffer; - const BYTE* const iend = istart + hbSize; - const BYTE* ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - if (hbSize < 4) { - /* This function only works when hbSize >= 4 */ - char buffer[4]; - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, headerBuffer, hbSize); - { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, - buffer, sizeof(buffer)); - if (FSE_isError(countSize)) return countSize; - if (countSize > hbSize) return ERROR(corruption_detected); - return countSize; - } } - assert(hbSize >= 4); - - /* init */ - memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ - bitStream = MEM_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1<1) & (charnum<=*maxSVPtr)) { - if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - assert((bitCount >> 3) <= 3); /* For first condition to work */ - ip += bitCount>>3; - bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; - } else { - bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; - int count; - - if ((bitStream & (threshold-1)) < (U32)max) { - count = bitStream & (threshold-1); - bitCount += nbBits-1; - } else { - count = bitStream & (2*threshold-1); - if (count >= threshold) count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ - normalizedCounter[charnum++] = (short)count; - previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } - - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { - ip += bitCount>>3; - bitCount &= 7; - } else { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ - if (remaining != 1) return ERROR(corruption_detected); - if (bitCount > 32) return ERROR(corruption_detected); - *maxSVPtr = charnum-1; - - ip += (bitCount+7)>>3; - return ip-istart; -} - - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -*/ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, - U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize) -{ - U32 weightTotal; - const BYTE* ip = (const BYTE*) src; - size_t iSize; - size_t oSize; - - if (!srcSize) return ERROR(srcSize_wrong); - iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) { /* special header */ - oSize = iSize - 127; - iSize = ((oSize+1)/2); - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - if (oSize >= hwSize) return ERROR(corruption_detected); - ip += 1; - { U32 n; - for (n=0; n> 4; - huffWeight[n+1] = ip[n/2] & 15; - } } } - else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ - if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); - weightTotal = 0; - { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } } - if (weightTotal == 0) return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ - { U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ - - /* results */ - *nbSymbolsPtr = (U32)(oSize+1); - return iSize+1; -} - -} diff --git a/src/duckdb/third_party/zstd/common/error_private.cpp b/src/duckdb/third_party/zstd/common/error_private.cpp deleted file mode 100644 index 207ef006d..000000000 --- a/src/duckdb/third_party/zstd/common/error_private.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* The purpose of this file is to have a single list of error strings embedded in binary */ - -#include "zstd/common/error_private.h" - -namespace duckdb_zstd { - -const char* ERR_getErrorString(ERR_enum code) -{ -#ifdef ZSTD_STRIP_ERROR_STRINGS - (void)code; - return "Error strings stripped"; -#else - static const char* const notErrorCode = "Unspecified error code"; - switch( code ) - { - case PREFIX(no_error): return "No error detected"; - case PREFIX(GENERIC): return "Error (generic)"; - case PREFIX(prefix_unknown): return "Unknown frame descriptor"; - case PREFIX(version_unsupported): return "Version not supported"; - case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; - case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; - case PREFIX(corruption_detected): return "Corrupted block detected"; - case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; - case PREFIX(parameter_unsupported): return "Unsupported parameter"; - case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; - case PREFIX(init_missing): return "Context should be init first"; - case PREFIX(memory_allocation): return "Allocation error : not enough memory"; - case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; - case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; - case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; - case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; - case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; - case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; - case PREFIX(dictionary_wrong): return "Dictionary mismatch"; - case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; - case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; - case PREFIX(srcSize_wrong): return "Src size is incorrect"; - case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; - /* following error codes are not stable and may be removed or changed in a future version */ - case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; - case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; - case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; - case PREFIX(maxCode): - default: return notErrorCode; - } -#endif -} - -} diff --git a/src/duckdb/third_party/zstd/common/fse_decompress.cpp b/src/duckdb/third_party/zstd/common/fse_decompress.cpp deleted file mode 100644 index 8845948e0..000000000 --- a/src/duckdb/third_party/zstd/common/fse_decompress.cpp +++ /dev/null @@ -1,287 +0,0 @@ -/* ****************************************************************** - * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ -#include "zstd/common/bitstream.h" -#include "zstd/common/compiler.h" -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/error_private.h" - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -namespace duckdb_zstd { - -/* Function templates */ -FSE_DTable* FSE_createDTable (unsigned tableLog) -{ - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); -} - -void FSE_freeDTable (FSE_DTable* dt) -{ - free(dt); -} - -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ - FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; - - /* Sanity Checks */ - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - - /* Init, lay down lowprob symbols */ - { FSE_DTableHeader DTableH; - DTableH.tableLog = (U16)tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - symbolNext[s] = normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { U32 u; - for (u=0; utableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - - -size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) -{ - void* ptr = dt; - FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; - void* dPtr = dt + 1; - FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask+1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) { - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state1); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state2); - break; - } - - if (op>(omax-2)) return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state2); - if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state1); - break; - } } - - return op-ostart; -} - - -size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize, - const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ - if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - - -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) -{ - const BYTE* const istart = (const BYTE*)cSrc; - const BYTE* ip = istart; - short counting[FSE_MAX_SYMBOL_VALUE+1]; - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - - /* normal FSE decoding mode */ - size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(NCountLength)) return NCountLength; - /* if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); */ /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; - - CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); - - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ -} - - -typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; - -size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) -{ - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); -} - -} - -#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/src/duckdb/third_party/zstd/common/xxhash.cpp b/src/duckdb/third_party/zstd/common/xxhash.cpp deleted file mode 100644 index 9ec937522..000000000 --- a/src/duckdb/third_party/zstd/common/xxhash.cpp +++ /dev/null @@ -1,859 +0,0 @@ -/* - * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash homepage: http://www.xxhash.com - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ - - -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ - defined(__ICCARM__) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : - */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ - -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif - -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; set to 0 when the input data - * is guaranteed to be aligned. - */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif - - -/* ************************************* -* Includes & Memory related functions -***************************************/ -/* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ -#include -#include /* size_t */ -/* for memcpy() */ -#include - -#include "zstd/common/xxhash.h" -#include "zstd/common/xxhash_static.h" - -/* ************************************* -* Compiler Specific Options -***************************************/ -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR - - -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ -# endif -#endif - -namespace duckdb_zstd { -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; - -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -#else - -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 - */ - -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} - -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ - - -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) - -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -#if defined(__ICCARM__) -# include -# define XXH_rotl32(x,r) __ROR(x,(32 - r)) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif - -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif - - -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; - -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) -#endif - - -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; - -FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); -} - -FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} - -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} - -FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} - -FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} - -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} - - -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/* ************************************* -* Constants -***************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; - -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; - -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } - - -/* ************************** -* Utils -****************************/ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dstState, const XXH32_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dstState, const XXH64_state_t* __restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} - - -/* *************************** -* Simple Hash Functions -*****************************/ - -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; -} - -FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif - - if (len>=16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; - - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); - - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } - - h32 += (U32) len; - - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_CREATESTATE_STATIC(state); - XXH32_reset(state, seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} - -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} - -FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif - - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - - } else { - h64 = seed + PRIME64_5; - } - - h64 += (U64) len; - - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_CREATESTATE_STATIC(state); - XXH64_reset(state, seed); - XXH64_update(state, input, len); - return XXH64_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} - - -/* ************************************************** -* Advanced Hash Functions -****************************************************/ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - - -/*** Hash feed ***/ - -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) -{ - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} - - -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} - - -FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } - - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; - - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH32_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; - U32 h32; - - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } - - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} - - - -/* **** XXH64 **** */ - -FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif - - state->total_len += len; - - if (state->memsize + len < 32) { /* fill in tmp buffer */ - if (input != NULL) { - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - } - state->memsize += (U32)len; - return XXH_OK; - } - - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } - - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; - - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } - - return XXH_OK; -} - -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} - - - -FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; - U64 h64; - - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; - - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 + PRIME64_5; - } - - h64 += (U64) state->total_len; - - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; - - return h64; -} - - -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} - - -/* ************************** -* Canonical representation -****************************/ - -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. -*/ - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); -} - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} - -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} - -} diff --git a/src/duckdb/third_party/zstd/common/zstd_common.cpp b/src/duckdb/third_party/zstd/common/zstd_common.cpp deleted file mode 100644 index d7700be3b..000000000 --- a/src/duckdb/third_party/zstd/common/zstd_common.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - - -/*-************************************* -* Dependencies -***************************************/ -#include /* malloc, calloc, free */ -#include /* memset */ -#include "zstd/common/error_private.h" -#include "zstd/common/zstd_internal.h" - -namespace duckdb_zstd { - -/*-**************************************** -* Version -******************************************/ -unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } - -const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } - - -/*-**************************************** -* ZSTD Error Management -******************************************/ -#undef ZSTD_isError /* defined within zstd_internal.h */ -/*! ZSTD_isError() : - * tells if a return value is an error code - * symbol is required for external callers */ -unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } - -/*! ZSTD_getErrorName() : - * provides error code string from function result (useful for debugging) */ -const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } - -/*! ZSTD_getError() : - * convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } - -/*! ZSTD_getErrorString() : - * provides error code string from enum */ -const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } - - - -/*=************************************************************** -* Custom allocator -****************************************************************/ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) - return customMem.customAlloc(customMem.opaque, size); - return malloc(size); -} - -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) { - /* calloc implemented as malloc+memset; - * not as efficient as calloc, but next best guess for custom malloc */ - void* const ptr = customMem.customAlloc(customMem.opaque, size); - memset(ptr, 0, size); - return ptr; - } - return calloc(1, size); -} - -void ZSTD_free(void* ptr, ZSTD_customMem customMem) -{ - if (ptr!=NULL) { - if (customMem.customFree) - customMem.customFree(customMem.opaque, ptr); - else - free(ptr); - } -} - -} diff --git a/src/duckdb/third_party/zstd/compress/fse_compress.cpp b/src/duckdb/third_party/zstd/compress/fse_compress.cpp deleted file mode 100644 index 378e2925c..000000000 --- a/src/duckdb/third_party/zstd/compress/fse_compress.cpp +++ /dev/null @@ -1,700 +0,0 @@ -/* ****************************************************************** - * FSE : Finite State Entropy encoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ -#include "zstd/common/compiler.h" -#include "zstd/common/mem.h" /* U32, U16, etc. */ -#include "zstd/common/debug.h" /* assert, DEBUGLOG */ -#include "zstd/compress/hist.h" /* HIST_count_wksp */ -#include "zstd/common/bitstream.h" -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/error_private.h" - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define FSE_isError ERR_isError - - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -# error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -# error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X,Y) X##Y -#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) -#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) - -namespace duckdb_zstd { - -/* Function templates */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - U32 const step = FSE_TABLESTEP(tableSize); - U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; - - FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; - U32 highThreshold = tableSize-1; - - /* CTable header */ - if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); - tableU16[-2] = (U16) tableLog; - tableU16[-1] = (U16) maxSymbolValue; - assert(tableLog < 16); /* required for threshold strategy to work */ - - /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - #ifdef __clang_analyzer__ - memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ - #endif - - /* symbol start positions */ - { U32 u; - cumul[0] = 0; - for (u=1; u <= maxSymbolValue+1; u++) { - if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ - cumul[u] = cumul[u-1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); - } else { - cumul[u] = cumul[u-1] + normalizedCounter[u-1]; - } } - cumul[maxSymbolValue+1] = tableSize+1; - } - - /* Spread symbols */ - { U32 position = 0; - U32 symbol; - for (symbol=0; symbol<=maxSymbolValue; symbol++) { - int nbOccurrences; - int const freq = normalizedCounter[symbol]; - for (nbOccurrences=0; nbOccurrences highThreshold) - position = (position + step) & tableMask; /* Low proba area */ - } } - - assert(position==0); /* Must have initialized all positions */ - } - - /* Build table */ - { U32 u; for (u=0; u> 3) + 3; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ -} - -static size_t -FSE_writeNCount_generic (void* header, size_t headerBufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) -{ - BYTE* const ostart = (BYTE*) header; - BYTE* out = ostart; - BYTE* const oend = ostart + headerBufferSize; - int nbBits; - const int tableSize = 1 << tableLog; - int remaining; - int threshold; - U32 bitStream = 0; - int bitCount = 0; - unsigned symbol = 0; - unsigned const alphabetSize = maxSymbolValue + 1; - int previousIs0 = 0; - - /* Table Size */ - bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; - bitCount += 4; - - /* Init */ - remaining = tableSize+1; /* +1 for extra accuracy */ - threshold = tableSize; - nbBits = tableLog+1; - - while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ - if (previousIs0) { - unsigned start = symbol; - while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; - if (symbol == alphabetSize) break; /* incorrect distribution */ - while (symbol >= start+24) { - start+=24; - bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend-2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE) bitStream; - out[1] = (BYTE)(bitStream>>8); - out+=2; - bitStream>>=16; - } - while (symbol >= start+3) { - start+=3; - bitStream += 3 << bitCount; - bitCount += 2; - } - bitStream += (symbol-start) << bitCount; - bitCount += 2; - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } } - { int count = normalizedCounter[symbol++]; - int const max = (2*threshold-1) - remaining; - remaining -= count < 0 ? -count : count; - count++; /* +1 for extra accuracy */ - if (count>=threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; - bitCount += nbBits; - bitCount -= (count>=1; } - } - if (bitCount>16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } } - - if (remaining != 1) - return ERROR(GENERIC); /* incorrect normalized distribution */ - assert(symbol <= alphabetSize); - - /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream>>8); - out+= (bitCount+7) /8; - - return (out-ostart); -} - - -size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ - - if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); -} - - -/*-************************************************************** -* FSE Compression Code -****************************************************************/ - -FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) -{ - size_t size; - if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); - return (FSE_CTable*)malloc(size); -} - -void FSE_freeCTable (FSE_CTable* ct) { free(ct); } - -/* provides the minimum logSize to safely represent a distribution */ -static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) -{ - U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - return minBits; -} - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) -{ - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - assert(srcSize > 1); /* Not supported, RLE should be used instead */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ - if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; - if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; - return tableLog; -} - -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); -} - - -/* Secondary normalization method. - To be used when primary method fails. */ - -static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue) -{ - short const NOT_YET_ASSIGNED = -2; - U32 s; - U32 distributed = 0; - U32 ToDistribute; - - /* Init */ - U32 const lowThreshold = (U32)(total >> tableLog); - U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); - - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == 0) { - norm[s]=0; - continue; - } - if (count[s] <= lowThreshold) { - norm[s] = -1; - distributed++; - total -= count[s]; - continue; - } - if (count[s] <= lowOne) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - - norm[s]=NOT_YET_ASSIGNED; - } - ToDistribute = (1 << tableLog) - distributed; - - if (ToDistribute == 0) - return 0; - - if ((total / ToDistribute) > lowOne) { - /* risk of rounding to zero */ - lowOne = (U32)((total * 3) / (ToDistribute * 2)); - for (s=0; s<=maxSymbolValue; s++) { - if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } } - ToDistribute = (1 << tableLog) - distributed; - } - - if (distributed == maxSymbolValue+1) { - /* all values are pretty poor; - probably incompressible data (should have already been detected); - find max, then give all remaining points to max */ - U32 maxV = 0, maxC = 0; - for (s=0; s<=maxSymbolValue; s++) - if (count[s] > maxC) { maxV=s; maxC=count[s]; } - norm[maxV] += (short)ToDistribute; - return 0; - } - - if (total == 0) { - /* all of the symbols were low enough for the lowOne or lowThreshold */ - for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) - if (norm[s] > 0) { ToDistribute--; norm[s]++; } - return 0; - } - - { U64 const vStepLog = 62 - tableLog; - U64 const mid = (1ULL << (vStepLog-1)) - 1; - U64 const rStep = ((((U64)1<> vStepLog); - U32 const sEnd = (U32)(end >> vStepLog); - U32 const weight = sEnd - sStart; - if (weight < 1) - return ERROR(GENERIC); - norm[s] = (short)weight; - tmpTotal = end; - } } } - - return 0; -} - - -size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t total, - unsigned maxSymbolValue) -{ - /* Sanity checks */ - if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - - { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; - U64 const scale = 62 - tableLog; - U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ - U64 const vStep = 1ULL<<(scale-20); - int stillToDistribute = 1<> tableLog); - - for (s=0; s<=maxSymbolValue; s++) { - if (count[s] == total) return 0; /* rle special case */ - if (count[s] == 0) { normalizedCounter[s]=0; continue; } - if (count[s] <= lowThreshold) { - normalizedCounter[s] = -1; - stillToDistribute--; - } else { - short proba = (short)((count[s]*step) >> scale); - if (proba<8) { - U64 restToBeat = vStep * rtbTable[proba]; - proba += (count[s]*step) - ((U64)proba< restToBeat; - } - if (proba > largestP) { largestP=proba; largest=s; } - normalizedCounter[s] = proba; - stillToDistribute -= proba; - } } - if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { - /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); - if (FSE_isError(errorCode)) return errorCode; - } - else normalizedCounter[largest] += (short)stillToDistribute; - } - -#if 0 - { /* Print Table (debug) */ - U32 s; - U32 nTotal = 0; - for (s=0; s<=maxSymbolValue; s++) - RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); - for (s=0; s<=maxSymbolValue; s++) - nTotal += abs(normalizedCounter[s]); - if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16) nbBits; - tableU16[-1] = (U16) maxSymbolValue; - - /* Build table */ - for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } - - /* 2 or 4 encoding per loop */ - while ( ip>istart ) { - - FSE_encodeSymbol(&bitC, &CState2, *--ip); - - if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ - FSE_FLUSHBITS(&bitC); - - FSE_encodeSymbol(&bitC, &CState1, *--ip); - - if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - } - - FSE_FLUSHBITS(&bitC); - } - - FSE_flushCState(&bitC, &CState2); - FSE_flushCState(&bitC, &CState1); - return BIT_closeCStream(&bitC); -} - -size_t FSE_compress_usingCTable (void* dst, size_t dstSize, - const void* src, size_t srcSize, - const FSE_CTable* ct) -{ - unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); - - if (fast) - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); - else - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); -} - - -size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } - -/* FSE_compress_wksp() : - * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` size must be `(1< not compressible */ - if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ - } - - tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); - - /* Write table description header */ - { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); - op += nc_err; - } - - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; - } - - /* check compressibility */ - if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; - - return op-ostart; -} - -typedef struct { - FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; - BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; -} fseWkspMax_t; - -size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) -{ - fseWkspMax_t scratchBuffer; - DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ - if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); - return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); -} - -size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); -} - -} - -#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/src/duckdb/third_party/zstd/compress/hist.cpp b/src/duckdb/third_party/zstd/compress/hist.cpp deleted file mode 100644 index 0a3d04a00..000000000 --- a/src/duckdb/third_party/zstd/compress/hist.cpp +++ /dev/null @@ -1,187 +0,0 @@ -/* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* --- dependencies --- */ -#include "zstd/common/mem.h" /* U32, BYTE, etc. */ -#include "zstd/common/debug.h" /* assert, DEBUGLOG */ -#include "zstd/common/error_private.h" /* ERROR */ -#include "zstd/compress/hist.h" - - -namespace duckdb_zstd { - -/* --- Error management --- */ -unsigned HIST_isError(size_t code) { return ERR_isError(code); } - -/*-************************************************************** - * Histogram functions - ****************************************************************/ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - const BYTE* ip = (const BYTE*)src; - const BYTE* const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned largestCount=0; - - memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); - if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } - - while (ip largestCount) largestCount = count[s]; - } - - return largestCount; -} - -typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; - -/* HIST_count_parallel_wksp() : - * store histogram into 4 intermediate tables, recombined at the end. - * this design makes better use of OoO cpus, - * and is noticeably faster when some values are heavily repeated. - * But it needs some additional workspace for intermediate tables. - * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32. - * @return : largest histogram frequency, - * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */ -static size_t HIST_count_parallel_wksp( - unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - HIST_checkInput_e check, - U32* const workSpace) -{ - const BYTE* ip = (const BYTE*)source; - const BYTE* const iend = ip+sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max=0; - U32* const Counting1 = workSpace; - U32* const Counting2 = Counting1 + 256; - U32* const Counting3 = Counting2 + 256; - U32* const Counting4 = Counting3 + 256; - - memset(workSpace, 0, 4*256*sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { U32 cached = MEM_read32(ip); ip += 4; - while (ip < iend-15) { - U32 c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - c = cached; cached = MEM_read32(ip); ip += 4; - Counting1[(BYTE) c ]++; - Counting2[(BYTE)(c>>8) ]++; - Counting3[(BYTE)(c>>16)]++; - Counting4[ c>>24 ]++; - } - ip-=4; - } - - /* finish last symbols */ - while (ipmaxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); - } } - - { U32 s; - if (maxSymbolValue > 255) maxSymbolValue = 255; - for (s=0; s<=maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) max = count[s]; - } } - - while (!count[maxSymbolValue]) maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if (sourceSize < 1500) /* heuristic threshold */ - return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); -} - -/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); -} - -/* HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* source, size_t sourceSize, - void* workSpace, size_t workSpaceSize) -{ - if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); - if (*maxSymbolValuePtr < 255) - return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); - *maxSymbolValuePtr = 255; - return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); -} - -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize) -{ - unsigned tmpCounters[HIST_WKSP_SIZE_U32]; - return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/huf_compress.cpp b/src/duckdb/third_party/zstd/compress/huf_compress.cpp deleted file mode 100644 index a7fa092e4..000000000 --- a/src/duckdb/third_party/zstd/compress/huf_compress.cpp +++ /dev/null @@ -1,801 +0,0 @@ -/* ****************************************************************** - * Huffman encoder, part of New Generation Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - -/* ************************************************************** -* Includes -****************************************************************/ -#include /* memcpy, memset */ -#include /* printf (debug) */ -#include "zstd/common/compiler.h" -#include "zstd/common/bitstream.h" -#include "zstd/compress/hist.h" -#include "zstd/common/fse.h" /* header compression */ -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/common/error_private.h" - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError -#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ - - -namespace duckdb_zstd { -/* ************************************************************** -* Utils -****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -} - - -/* ******************************************************* -* HUF : Huffman block compression -*********************************************************/ -/* HUF_compressWeights() : - * Same as FSE_compress(), but dedicated to huff0's weights compression. - * The use case needs much less stack memory. - * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. - */ -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) -{ - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const oend = ostart + dstSize; - - unsigned maxSymbolValue = HUF_TABLELOG_MAX; - U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - - FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; - BYTE scratchBuffer[1< not compressible */ - } - - tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); - - /* Write table description header */ - { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) ); - op += hSize; - } - - /* Compress */ - CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); - { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) ); - if (cSize == 0) return 0; /* not enough space for compressed data */ - op += cSize; - } - - return (size_t)(op-ostart); -} - - -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt within "zstd/common/huf.h" */ - -/*! HUF_writeCTable() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, - const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) -{ - BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ - BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; - BYTE* op = (BYTE*)dst; - U32 n; - - /* check conditions */ - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - - /* convert to weight */ - bitsToWeight[0] = 0; - for (n=1; n1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ - op[0] = (BYTE)hSize; - return hSize+1; - } } - - /* write raw values as 4-bits (max : 15) */ - if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ - if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ - for (n=0; n HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); - - /* Prepare base value per rank */ - { U32 n, nextRankStart = 0; - for (n=1; n<=tableLog; n++) { - U32 current = nextRankStart; - nextRankStart += (rankVal[n] << (n-1)); - rankVal[n] = current; - } } - - /* fill nbBits */ - *hasZeroWeights = 0; - { U32 n; for (n=0; nn=tableLog+1 */ - U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; - { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - /* assign value within rank, symbol order */ - { U32 n; for (n=0; n maxNbBits */ - - /* there are several too large elements (at least >= 2) */ - { int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); - int n = (int)lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n --; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - - /* repay normalized cost */ - { U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_TABLELOG_MAX+2]; - - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); - { U32 currentNbBits = maxNbBits; - int pos; - for (pos=n ; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currentNbBits) continue; - currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits-currentNbBits] = (U32)pos; - } } - - while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; - for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 const highPos = rankLast[nBitsToDecrease]; - U32 const lowPos = rankLast[nBitsToDecrease-1]; - if (highPos == noSymbol) continue; - if (lowPos == noSymbol) break; - { U32 const highTotal = huffNode[highPos].count; - U32 const lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) break; - } } - /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ - while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease ++; - totalCost -= 1 << (nBitsToDecrease-1); - if (rankLast[nBitsToDecrease-1] == noSymbol) - rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits ++; - if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } } /* while (totalCost > 0) */ - - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ - while (huffNode[n].nbBits == maxNbBits) n--; - huffNode[n+1].nbBits--; - assert(n >= 0); - rankLast[1] = (U32)(n+1); - totalCost++; - continue; - } - huffNode[ rankLast[1] + 1 ].nbBits--; - rankLast[1]++; - totalCost ++; - } } } /* there are several too large elements (at least >= 2) */ - - return maxNbBits; -} - -typedef struct { - U32 base; - U32 current; -} rankPos; - -typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; - -#define RANK_POSITION_TABLE_SIZE 32 - -typedef struct { - huffNodeTable huffNodeTbl; - rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; -} HUF_buildCTable_wksp_tables; - -static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) -{ - U32 n; - - memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); - for (n=0; n<=maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rankPosition[r].base ++; - } - for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base; - for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base; - for (n=0; n<=maxSymbolValue; n++) { - U32 const c = count[n]; - U32 const r = BIT_highbit32(c+1) + 1; - U32 pos = rankPosition[r].current++; - while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { - huffNode[pos] = huffNode[pos-1]; - pos--; - } - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } -} - - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). - */ -#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) - -size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) -{ - HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace; - nodeElt* const huffNode0 = wksp_tables->huffNodeTbl; - nodeElt* const huffNode = huffNode0+1; - int nonNullRank; - int lowS, lowN; - int nodeNb = STARTNODE; - int n, nodeRoot; - - /* safety checks */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) - return ERROR(workSpace_tooSmall); - if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); - - /* init for parents */ - nonNullRank = (int)maxSymbolValue; - while(huffNode[nonNullRank].count == 0) nonNullRank--; - lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; - huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; - nodeNb++; lowS-=2; - for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); - huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ - - /* create parents */ - while (nodeNb <= nodeRoot) { - int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; - nodeNb++; - } - - /* distribute weights (unlimited tree height) */ - huffNode[nodeRoot].nbBits = 0; - for (n=nodeRoot-1; n>=STARTNODE; n--) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - for (n=0; n<=nonNullRank; n++) - huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; - - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; - int const alphabetSize = (int)(maxSymbolValue + 1); - if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ - for (n=0; n<=nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { U16 min = 0; - for (n=(int)maxNbBits; n>0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } } - for (n=0; n> 3; -} - -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { - int bad = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); - } - return !bad; -} - -size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } - -FORCE_INLINE_TEMPLATE void -HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) -{ - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -#define HUF_FLUSHBITS(s) BIT_flushBits(s) - -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) - -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) - -FORCE_INLINE_TEMPLATE size_t -HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - const BYTE* ip = (const BYTE*) src; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - size_t n; - BIT_CStream_t bitC; - - /* init */ - if (dstSize < 8) return 0; /* not enough space to compress */ - { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); - if (HUF_isError(initErr)) return 0; } - - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) - { - case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); - HUF_FLUSHBITS_2(&bitC); - /* fall-through */ - case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); - HUF_FLUSHBITS_1(&bitC); - /* fall-through */ - case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); - HUF_FLUSHBITS(&bitC); - /* fall-through */ - case 0 : /* fall-through */ - default: break; - } - - for (; n>0; n-=4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n- 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n- 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); -} - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - if (bmi2) { - return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); - } - return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); -} - -#else - -static size_t -HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, const int bmi2) -{ - (void)bmi2; - return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); -} - -#endif - -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - - -static size_t -HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, - const void* src, size_t srcSize, - const HUF_CElt* CTable, int bmi2) -{ - size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - - if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ - if (srcSize < 12) return 0; /* no saving possible : too small input */ - op += 6; /* jumpTable */ - - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+2, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); - if (cSize==0) return 0; - assert(cSize <= 65535); - MEM_writeLE16(ostart+4, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - assert(op <= oend); - assert(ip <= iend); - { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); - if (cSize==0) return 0; - op += cSize; - } - - return (size_t)(op-ostart); -} - -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) -{ - return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); -} - -typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; - -static size_t HUF_compressCTable_internal( - BYTE* const ostart, BYTE* op, BYTE* const oend, - const void* src, size_t srcSize, - HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) -{ - size_t const cSize = (nbStreams==HUF_singleStream) ? - HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : - HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); - if (HUF_isError(cSize)) { return cSize; } - if (cSize==0) { return 0; } /* uncompressible */ - op += cSize; - /* check compressibility */ - assert(op >= ostart); - if ((size_t)(op-ostart) >= srcSize-1) { return 0; } - return (size_t)(op-ostart); -} - -typedef struct { - unsigned count[HUF_SYMBOLVALUE_MAX + 1]; - HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; - HUF_buildCTable_wksp_tables buildCTable_wksp; -} HUF_compress_tables_t; - -/* HUF_compress_internal() : - * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -static size_t -HUF_compress_internal (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - HUF_nbStreams_e nbStreams, - void* workSpace, size_t wkspSize, - HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, - const int bmi2) -{ - HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart; - - HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); - - /* checks & inits */ - if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ - if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); - if (!srcSize) return 0; /* Uncompressed */ - if (!dstSize) return 0; /* cannot fit anything within dst budget */ - if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ - if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); - if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; - if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; - - /* Heuristic : If old table is valid, use it for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } - - /* Scan input and build symbol stats */ - { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); - if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ - if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ - } - - /* Check validity of previous table */ - if ( repeat - && *repeat == HUF_repeat_check - && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } - - /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, - maxSymbolValue, huffLog, - &table->buildCTable_wksp, sizeof(table->buildCTable_wksp)); - CHECK_F(maxBits); - huffLog = (U32)maxBits; - /* Zero unused symbols in CTable, so we can check it for validity */ - memset(table->CTable + (maxSymbolValue + 1), 0, - sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); - } - - /* Write table description header */ - { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); - /* Check if using previous huffman table is beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); - size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, oldHufTable, bmi2); - } } - - /* Use the new huffman table */ - if (hSize + 12ul >= srcSize) { return 0; } - op += hSize; - if (repeat) { *repeat = HUF_repeat_none; } - if (oldHufTable) - memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ - } - return HUF_compressCTable_internal(ostart, op, oend, - src, srcSize, - nbStreams, table->CTable, bmi2); -} - - -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); -} - -size_t HUF_compress1X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_singleStream, - workSpace, wkspSize, hufTable, - repeat, preferRepeat, bmi2); -} - -size_t HUF_compress1X (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * provide workspace to generate compression tables */ -size_t HUF_compress4X_wksp (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - NULL, NULL, 0, 0 /*bmi2*/); -} - -/* HUF_compress4X_repeat(): - * compress input using 4 streams. - * re-use an existing huffman compression table */ -size_t HUF_compress4X_repeat (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog, - void* workSpace, size_t wkspSize, - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, - maxSymbolValue, huffLog, HUF_fourStreams, - workSpace, wkspSize, - hufTable, repeat, preferRepeat, bmi2); -} - -size_t HUF_compress2 (void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned huffLog) -{ - unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; - return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); -} - -size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) -{ - return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_compress.cpp b/src/duckdb/third_party/zstd/compress/zstd_compress.cpp deleted file mode 100644 index 649e53571..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +++ /dev/null @@ -1,4283 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/*-************************************* -* Dependencies -***************************************/ -#include /* INT_MAX */ -#include /* memset */ -#include "zstd/common/mem.h" -#include "zstd/compress/hist.h" /* HIST_countFast_wksp */ -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/compress/zstd_compress_internal.h" -#include "zstd/compress/zstd_compress_sequences.h" -#include "zstd/compress/zstd_compress_literals.h" -#include "zstd/compress/zstd_fast.h" -#include "zstd/compress/zstd_double_fast.h" -#include "zstd/compress/zstd_lazy.h" -#include "zstd/compress/zstd_opt.h" -#include "zstd/compress/zstd_ldm.h" -#include "zstd/compress/zstd_compress_superblock.h" - -#if defined (MEMORY_SANITIZER) -#include -#endif - -namespace duckdb_zstd { -/*-************************************* -* Helper functions -***************************************/ -/* ZSTD_compressBound() - * Note that the result from this function is only compatible with the "normal" - * full-block strategy. - * When there are a lot of small blocks due to frequent flush in streaming mode - * the overhead of headers can make the compressed data to be larger than the - * return value of ZSTD_compressBound(). - */ -size_t ZSTD_compressBound(size_t srcSize) { - return ZSTD_COMPRESSBOUND(srcSize); -} - - -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CDict_s { - const void* dictContent; - size_t dictContentSize; - U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ - ZSTD_cwksp workspace; - ZSTD_matchState_t matchState; - ZSTD_compressedBlockState_t cBlockState; - ZSTD_customMem customMem; - U32 dictID; - int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ -}; /* typedef'd to ZSTD_CDict within "zstd.h" */ - -ZSTD_CCtx* ZSTD_createCCtx(void) -{ - return ZSTD_createCCtx_advanced({NULL, NULL, NULL}); -} - -static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) -{ - assert(cctx != NULL); - memset(cctx, 0, sizeof(*cctx)); - cctx->customMem = memManager; - cctx->bmi2 = 0; - { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); - assert(!ZSTD_isError(err)); - (void)err; - } -} - -ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_STATIC_ASSERT(zcss_init==0); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) return NULL; - ZSTD_initCCtx(cctx, customMem); - return cctx; - } -} - -ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) -{ - ZSTD_cwksp ws; - ZSTD_CCtx* cctx; - if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ - if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - - cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); - if (cctx == NULL) return NULL; - - memset(cctx, 0, sizeof(ZSTD_CCtx)); - ZSTD_cwksp_move(&cctx->workspace, &ws); - cctx->staticSize = workspaceSize; - - /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ - if (!ZSTD_cwksp_check_available(&cctx->workspace, HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; - cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); - cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, HUF_WORKSPACE_SIZE); - cctx->bmi2 = 0; - return cctx; -} - -/** - * Clears and frees all of the dictionaries in the CCtx. - */ -static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) -{ - ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); - ZSTD_freeCDict(cctx->localDict.cdict); - memset(&cctx->localDict, 0, sizeof(cctx->localDict)); - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); - cctx->cdict = NULL; -} - -static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) -{ - size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; - size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); - return bufferSize + cdictSize; -} - -static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) -{ - assert(cctx != NULL); - assert(cctx->staticSize == 0); - ZSTD_clearAllDicts(cctx); -#ifdef ZSTD_MULTITHREAD - ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; -#endif - ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); -} - -size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) -{ - if (cctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "not compatible with static CCtx"); - { - int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); - ZSTD_freeCCtxContent(cctx); - if (!cctxInWorkspace) { - ZSTD_free(cctx, cctx->customMem); - } - } - return 0; -} - - -static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - return ZSTDMT_sizeof_CCtx(cctx->mtctx); -#else - (void)cctx; - return 0; -#endif -} - - -size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) -{ - if (cctx==NULL) return 0; /* support sizeof on NULL */ - /* cctx may be in the workspace */ - return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) - + ZSTD_cwksp_sizeof(&cctx->workspace) - + ZSTD_sizeof_localDict(cctx->localDict) - + ZSTD_sizeof_mtctx(cctx); -} - -size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) -{ - return ZSTD_sizeof_CCtx(zcs); /* same object */ -} - -/* private API call, for dictBuilder only */ -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } - -static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( - ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params cctxParams; - memset(&cctxParams, 0, sizeof(cctxParams)); - cctxParams.cParams = cParams; - cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - assert(!ZSTD_checkCParams(cParams)); - cctxParams.fParams.contentSizeFlag = 1; - return cctxParams; -} - -static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( - ZSTD_customMem customMem) -{ - ZSTD_CCtx_params* params; - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - params = (ZSTD_CCtx_params*)ZSTD_calloc( - sizeof(ZSTD_CCtx_params), customMem); - if (!params) { return NULL; } - params->customMem = customMem; - params->compressionLevel = ZSTD_CLEVEL_DEFAULT; - params->fParams.contentSizeFlag = 1; - return params; -} - -ZSTD_CCtx_params* ZSTD_createCCtxParams(void) -{ - return ZSTD_createCCtxParams_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) -{ - if (params == NULL) { return 0; } - ZSTD_free(params, params->customMem); - return 0; -} - -size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) -{ - return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); -} - -size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - memset(cctxParams, 0, sizeof(*cctxParams)); - cctxParams->compressionLevel = compressionLevel; - cctxParams->fParams.contentSizeFlag = 1; - return 0; -} - -size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) -{ - RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - memset(cctxParams, 0, sizeof(*cctxParams)); - assert(!ZSTD_checkCParams(params.cParams)); - cctxParams->cParams = params.cParams; - cctxParams->fParams = params.fParams; - cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return 0; -} - -/* ZSTD_assignParamsToCCtxParams() : - * params is presumed valid at this stage */ -static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams( - const ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) -{ - ZSTD_CCtx_params ret = *cctxParams; - assert(!ZSTD_checkCParams(params->cParams)); - ret.cParams = params->cParams; - ret.fParams = params->fParams; - ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */ - return ret; -} - -ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) -{ - ZSTD_bounds bounds = { 0, 0, 0 }; - - switch(param) - { - case ZSTD_c_compressionLevel: - bounds.lowerBound = ZSTD_minCLevel(); - bounds.upperBound = ZSTD_maxCLevel(); - return bounds; - - case ZSTD_c_windowLog: - bounds.lowerBound = ZSTD_WINDOWLOG_MIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - - case ZSTD_c_hashLog: - bounds.lowerBound = ZSTD_HASHLOG_MIN; - bounds.upperBound = ZSTD_HASHLOG_MAX; - return bounds; - - case ZSTD_c_chainLog: - bounds.lowerBound = ZSTD_CHAINLOG_MIN; - bounds.upperBound = ZSTD_CHAINLOG_MAX; - return bounds; - - case ZSTD_c_searchLog: - bounds.lowerBound = ZSTD_SEARCHLOG_MIN; - bounds.upperBound = ZSTD_SEARCHLOG_MAX; - return bounds; - - case ZSTD_c_minMatch: - bounds.lowerBound = ZSTD_MINMATCH_MIN; - bounds.upperBound = ZSTD_MINMATCH_MAX; - return bounds; - - case ZSTD_c_targetLength: - bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; - bounds.upperBound = ZSTD_TARGETLENGTH_MAX; - return bounds; - - case ZSTD_c_strategy: - bounds.lowerBound = ZSTD_STRATEGY_MIN; - bounds.upperBound = ZSTD_STRATEGY_MAX; - return bounds; - - case ZSTD_c_contentSizeFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_checksumFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_dictIDFlag: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_nbWorkers: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_NBWORKERS_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_jobSize: - bounds.lowerBound = 0; -#ifdef ZSTD_MULTITHREAD - bounds.upperBound = ZSTDMT_JOBSIZE_MAX; -#else - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_overlapLog: -#ifdef ZSTD_MULTITHREAD - bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; - bounds.upperBound = ZSTD_OVERLAPLOG_MAX; -#else - bounds.lowerBound = 0; - bounds.upperBound = 0; -#endif - return bounds; - - case ZSTD_c_enableLongDistanceMatching: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_ldmHashLog: - bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; - return bounds; - - case ZSTD_c_ldmMinMatch: - bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; - bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; - return bounds; - - case ZSTD_c_ldmBucketSizeLog: - bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; - bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; - return bounds; - - case ZSTD_c_ldmHashRateLog: - bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; - bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; - return bounds; - - /* experimental parameters */ - case ZSTD_c_rsyncable: - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_forceMaxWindow : - bounds.lowerBound = 0; - bounds.upperBound = 1; - return bounds; - - case ZSTD_c_format: - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - bounds.lowerBound = ZSTD_f_zstd1; - bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; - - case ZSTD_c_forceAttachDict: - ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy); - bounds.lowerBound = ZSTD_dictDefaultAttach; - bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ - return bounds; - - case ZSTD_c_literalCompressionMode: - ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); - bounds.lowerBound = ZSTD_lcm_auto; - bounds.upperBound = ZSTD_lcm_uncompressed; - return bounds; - - case ZSTD_c_targetCBlockSize: - bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; - bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; - return bounds; - - case ZSTD_c_srcSizeHint: - bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; - bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; - return bounds; - - default: - bounds.error = ERROR(parameter_unsupported); - return bounds; - } -} - -/* ZSTD_cParam_clampBounds: - * Clamps the value into the bounded range. - */ -static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return bounds.error; - if (*value < bounds.lowerBound) *value = bounds.lowerBound; - if (*value > bounds.upperBound) *value = bounds.upperBound; - return 0; -} - -#define BOUNDCHECK(cParam, val) { \ - RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ - parameter_outOfBound, "Param out of bounds"); \ -} - - -static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) -{ - switch(param) - { - case ZSTD_c_compressionLevel: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - return 1; - - case ZSTD_c_format: - case ZSTD_c_windowLog: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow : - case ZSTD_c_nbWorkers: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - default: - return 0; - } -} - -size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) -{ - DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); - if (cctx->streamStage != zcss_init) { - if (ZSTD_isUpdateAuthorized(param)) { - cctx->cParamsChanged = 1; - } else { - RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); - } } - - switch(param) - { - case ZSTD_c_nbWorkers: - RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, - "MT not compatible with static alloc"); - break; - - case ZSTD_c_compressionLevel: - case ZSTD_c_windowLog: - case ZSTD_c_hashLog: - case ZSTD_c_chainLog: - case ZSTD_c_searchLog: - case ZSTD_c_minMatch: - case ZSTD_c_targetLength: - case ZSTD_c_strategy: - case ZSTD_c_ldmHashRateLog: - case ZSTD_c_format: - case ZSTD_c_contentSizeFlag: - case ZSTD_c_checksumFlag: - case ZSTD_c_dictIDFlag: - case ZSTD_c_forceMaxWindow: - case ZSTD_c_forceAttachDict: - case ZSTD_c_literalCompressionMode: - case ZSTD_c_jobSize: - case ZSTD_c_overlapLog: - case ZSTD_c_rsyncable: - case ZSTD_c_enableLongDistanceMatching: - case ZSTD_c_ldmHashLog: - case ZSTD_c_ldmMinMatch: - case ZSTD_c_ldmBucketSizeLog: - case ZSTD_c_targetCBlockSize: - case ZSTD_c_srcSizeHint: - break; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); -} - -size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, - ZSTD_cParameter param, int value) -{ - DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); - switch(param) - { - case ZSTD_c_format : - BOUNDCHECK(ZSTD_c_format, value); - CCtxParams->format = (ZSTD_format_e)value; - return (size_t)CCtxParams->format; - - case ZSTD_c_compressionLevel : { - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - if (value) { /* 0 : does not change current level */ - CCtxParams->compressionLevel = value; - } - if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; - return 0; /* return type (size_t) cannot represent negative values */ - } - - case ZSTD_c_windowLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_windowLog, value); - CCtxParams->cParams.windowLog = (U32)value; - return CCtxParams->cParams.windowLog; - - case ZSTD_c_hashLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_hashLog, value); - CCtxParams->cParams.hashLog = (U32)value; - return CCtxParams->cParams.hashLog; - - case ZSTD_c_chainLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_chainLog, value); - CCtxParams->cParams.chainLog = (U32)value; - return CCtxParams->cParams.chainLog; - - case ZSTD_c_searchLog : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_searchLog, value); - CCtxParams->cParams.searchLog = (U32)value; - return (size_t)value; - - case ZSTD_c_minMatch : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_minMatch, value); - CCtxParams->cParams.minMatch = value; - return CCtxParams->cParams.minMatch; - - case ZSTD_c_targetLength : - BOUNDCHECK(ZSTD_c_targetLength, value); - CCtxParams->cParams.targetLength = value; - return CCtxParams->cParams.targetLength; - - case ZSTD_c_strategy : - if (value!=0) /* 0 => use default */ - BOUNDCHECK(ZSTD_c_strategy, value); - CCtxParams->cParams.strategy = (ZSTD_strategy)value; - return (size_t)CCtxParams->cParams.strategy; - - case ZSTD_c_contentSizeFlag : - /* Content size written in frame header _when known_ (default:1) */ - DEBUGLOG(4, "set content size flag = %u", (value!=0)); - CCtxParams->fParams.contentSizeFlag = value != 0; - return CCtxParams->fParams.contentSizeFlag; - - case ZSTD_c_checksumFlag : - /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ - CCtxParams->fParams.checksumFlag = value != 0; - return CCtxParams->fParams.checksumFlag; - - case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ - DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); - CCtxParams->fParams.noDictIDFlag = !value; - return !CCtxParams->fParams.noDictIDFlag; - - case ZSTD_c_forceMaxWindow : - CCtxParams->forceWindow = (value != 0); - return CCtxParams->forceWindow; - - case ZSTD_c_forceAttachDict : { - const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; - BOUNDCHECK(ZSTD_c_forceAttachDict, pref); - CCtxParams->attachDictPref = pref; - return CCtxParams->attachDictPref; - } - - case ZSTD_c_literalCompressionMode : { - const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; - BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); - CCtxParams->literalCompressionMode = lcm; - return CCtxParams->literalCompressionMode; - } - - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - CCtxParams->nbWorkers = value; - return CCtxParams->nbWorkers; -#endif - - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - /* Adjust to the minimum non-default value. */ - if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) - value = ZSTDMT_JOBSIZE_MIN; - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); - assert(value >= 0); - CCtxParams->jobSize = value; - return CCtxParams->jobSize; -#endif - - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->overlapLog = value; - return CCtxParams->overlapLog; -#endif - - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); - return 0; -#else - FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); - CCtxParams->rsyncable = value; - return CCtxParams->rsyncable; -#endif - - case ZSTD_c_enableLongDistanceMatching : - CCtxParams->ldmParams.enableLdm = (value!=0); - return CCtxParams->ldmParams.enableLdm; - - case ZSTD_c_ldmHashLog : - if (value!=0) /* 0 ==> auto */ - BOUNDCHECK(ZSTD_c_ldmHashLog, value); - CCtxParams->ldmParams.hashLog = value; - return CCtxParams->ldmParams.hashLog; - - case ZSTD_c_ldmMinMatch : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmMinMatch, value); - CCtxParams->ldmParams.minMatchLength = value; - return CCtxParams->ldmParams.minMatchLength; - - case ZSTD_c_ldmBucketSizeLog : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); - CCtxParams->ldmParams.bucketSizeLog = value; - return CCtxParams->ldmParams.bucketSizeLog; - - case ZSTD_c_ldmHashRateLog : - RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, - parameter_outOfBound, "Param out of bounds!"); - CCtxParams->ldmParams.hashRateLog = value; - return CCtxParams->ldmParams.hashRateLog; - - case ZSTD_c_targetCBlockSize : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_targetCBlockSize, value); - CCtxParams->targetCBlockSize = value; - return CCtxParams->targetCBlockSize; - - case ZSTD_c_srcSizeHint : - if (value!=0) /* 0 ==> default */ - BOUNDCHECK(ZSTD_c_srcSizeHint, value); - CCtxParams->srcSizeHint = value; - return CCtxParams->srcSizeHint; - - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } -} - -size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) -{ - return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); -} - -size_t ZSTD_CCtxParams_getParameter( - ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) -{ - switch(param) - { - case ZSTD_c_format : - *value = CCtxParams->format; - break; - case ZSTD_c_compressionLevel : - *value = CCtxParams->compressionLevel; - break; - case ZSTD_c_windowLog : - *value = (int)CCtxParams->cParams.windowLog; - break; - case ZSTD_c_hashLog : - *value = (int)CCtxParams->cParams.hashLog; - break; - case ZSTD_c_chainLog : - *value = (int)CCtxParams->cParams.chainLog; - break; - case ZSTD_c_searchLog : - *value = CCtxParams->cParams.searchLog; - break; - case ZSTD_c_minMatch : - *value = CCtxParams->cParams.minMatch; - break; - case ZSTD_c_targetLength : - *value = CCtxParams->cParams.targetLength; - break; - case ZSTD_c_strategy : - *value = (unsigned)CCtxParams->cParams.strategy; - break; - case ZSTD_c_contentSizeFlag : - *value = CCtxParams->fParams.contentSizeFlag; - break; - case ZSTD_c_checksumFlag : - *value = CCtxParams->fParams.checksumFlag; - break; - case ZSTD_c_dictIDFlag : - *value = !CCtxParams->fParams.noDictIDFlag; - break; - case ZSTD_c_forceMaxWindow : - *value = CCtxParams->forceWindow; - break; - case ZSTD_c_forceAttachDict : - *value = CCtxParams->attachDictPref; - break; - case ZSTD_c_literalCompressionMode : - *value = CCtxParams->literalCompressionMode; - break; - case ZSTD_c_nbWorkers : -#ifndef ZSTD_MULTITHREAD - assert(CCtxParams->nbWorkers == 0); -#endif - *value = CCtxParams->nbWorkers; - break; - case ZSTD_c_jobSize : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - assert(CCtxParams->jobSize <= INT_MAX); - *value = (int)CCtxParams->jobSize; - break; -#endif - case ZSTD_c_overlapLog : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - *value = CCtxParams->overlapLog; - break; -#endif - case ZSTD_c_rsyncable : -#ifndef ZSTD_MULTITHREAD - RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); -#else - *value = CCtxParams->rsyncable; - break; -#endif - case ZSTD_c_enableLongDistanceMatching : - *value = CCtxParams->ldmParams.enableLdm; - break; - case ZSTD_c_ldmHashLog : - *value = CCtxParams->ldmParams.hashLog; - break; - case ZSTD_c_ldmMinMatch : - *value = CCtxParams->ldmParams.minMatchLength; - break; - case ZSTD_c_ldmBucketSizeLog : - *value = CCtxParams->ldmParams.bucketSizeLog; - break; - case ZSTD_c_ldmHashRateLog : - *value = CCtxParams->ldmParams.hashRateLog; - break; - case ZSTD_c_targetCBlockSize : - *value = (int)CCtxParams->targetCBlockSize; - break; - case ZSTD_c_srcSizeHint : - *value = (int)CCtxParams->srcSizeHint; - break; - default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); - } - return 0; -} - -/** ZSTD_CCtx_setParametersUsingCCtxParams() : - * just applies `params` into `cctx` - * no action is performed, parameters are merely stored. - * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. - * This is possible even if a compression is ongoing. - * In which case, new parameters will be applied on the fly, starting with next compression job. - */ -size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) -{ - DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "The context is in the wrong stage!"); - RETURN_ERROR_IF(cctx->cdict, stage_wrong, - "Can't override parameters with cdict attached (some must " - "be inherited from the cdict)."); - - cctx->requestedParams = *params; - return 0; -} - -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't set pledgedSrcSize when not in init stage."); - cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; - return 0; -} - -/** - * Initializes the local dict using the requested parameters. - * NOTE: This does not use the pledged src size, because it may be used for more - * than one compression. - */ -static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) -{ - ZSTD_localDict* const dl = &cctx->localDict; - ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN, dl->dictSize); - if (dl->dict == NULL) { - /* No local dictionary. */ - assert(dl->dictBuffer == NULL); - assert(dl->cdict == NULL); - assert(dl->dictSize == 0); - return 0; - } - if (dl->cdict != NULL) { - assert(cctx->cdict == dl->cdict); - /* Local dictionary already initialized. */ - return 0; - } - assert(dl->dictSize > 0); - assert(cctx->cdict == NULL); - assert(cctx->prefixDict.dict == NULL); - - dl->cdict = ZSTD_createCDict_advanced( - dl->dict, - dl->dictSize, - ZSTD_dlm_byRef, - dl->dictContentType, - cParams, - cctx->customMem); - RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); - cctx->cdict = dl->cdict; - return 0; -} - -size_t ZSTD_CCtx_loadDictionary_advanced( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't load a dictionary when ctx is not in init stage."); - RETURN_ERROR_IF(cctx->staticSize, memory_allocation, - "no malloc for static CCtx"); - DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); - ZSTD_clearAllDicts(cctx); /* in case one already exists */ - if (dict == NULL || dictSize == 0) /* no dictionary mode */ - return 0; - if (dictLoadMethod == ZSTD_dlm_byRef) { - cctx->localDict.dict = dict; - } else { - void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); - RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); - memcpy(dictBuffer, dict, dictSize); - cctx->localDict.dictBuffer = dictBuffer; - cctx->localDict.dict = dictBuffer; - } - cctx->localDict.dictSize = dictSize; - cctx->localDict.dictContentType = dictContentType; - return 0; -} - -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( - ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); -} - -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) -{ - return ZSTD_CCtx_loadDictionary_advanced( - cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); -} - - -size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a dict when ctx not in init stage."); - /* Free the existing local cdict (if any) to save memory. */ - ZSTD_clearAllDicts(cctx); - cctx->cdict = cdict; - return 0; -} - -size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); -} - -size_t ZSTD_CCtx_refPrefix_advanced( - ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't ref a prefix when ctx not in init stage."); - ZSTD_clearAllDicts(cctx); - if (prefix != NULL && prefixSize > 0) { - cctx->prefixDict.dict = prefix; - cctx->prefixDict.dictSize = prefixSize; - cctx->prefixDict.dictContentType = dictContentType; - } - return 0; -} - -/*! ZSTD_CCtx_reset() : - * Also dumps dictionary */ -size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - cctx->streamStage = zcss_init; - cctx->pledgedSrcSizePlusOne = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, - "Can't reset parameters only when not in init stage."); - ZSTD_clearAllDicts(cctx); - return ZSTD_CCtxParams_reset(&cctx->requestedParams); - } - return 0; -} - - -/** ZSTD_checkCParams() : - control CParam values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ - BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); - BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); - return 0; -} - -/** ZSTD_clampCParams() : - * make CParam values within valid range. - * @return : valid CParams */ -static ZSTD_compressionParameters -ZSTD_clampCParams(ZSTD_compressionParameters cParams) -{ -# define CLAMP_TYPE(cParam, val, type) { \ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ - if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ - } -# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) - CLAMP(ZSTD_c_windowLog, cParams.windowLog); - CLAMP(ZSTD_c_chainLog, cParams.chainLog); - CLAMP(ZSTD_c_hashLog, cParams.hashLog); - CLAMP(ZSTD_c_searchLog, cParams.searchLog); - CLAMP(ZSTD_c_minMatch, cParams.minMatch); - CLAMP(ZSTD_c_targetLength,cParams.targetLength); - CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); - return cParams; -} - -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) -{ - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; -} - -/** ZSTD_adjustCParams_internal() : - * optimize `cPar` for a specified input (`srcSize` and `dictSize`). - * mostly downsize to reduce memory consumption and initialization latency. - * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. - * note : `srcSize==0` means 0! - * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ -static ZSTD_compressionParameters -ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) -{ - static const U64 minSrcSize = 513; /* (1<<9) + 1 */ - static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); - assert(ZSTD_checkCParams(cPar)==0); - - if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) - srcSize = minSrcSize; - - /* resize windowLog if input is small enough, to use less memory */ - if ( (srcSize < maxWindowResize) - && (dictSize < maxWindowResize) ) { - U32 const tSize = (U32)(srcSize + dictSize); - static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; - U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : - ZSTD_highbit32(tSize-1) + 1; - if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; - } - if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1; - { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); - } - - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ - - return cPar; -} - -ZSTD_compressionParameters -ZSTD_adjustCParams(ZSTD_compressionParameters cPar, - unsigned long long srcSize, - size_t dictSize) -{ - cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ - if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); -} - -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize); - -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize) -{ - ZSTD_compressionParameters cParams; - if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { - srcSizeHint = CCtxParams->srcSizeHint; - } - cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize); - if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; - if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog; - if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog; - if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog; - if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog; - if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch; - if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength; - if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy; - assert(!ZSTD_checkCParams(cParams)); - /* srcSizeHint == 0 means 0 */ - return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize); -} - -static size_t -ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, - const U32 forCCtx) -{ - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't - * surrounded by redzones in ASAN. */ - size_t const tableSpace = chainSize * sizeof(U32) - + hSize * sizeof(U32) - + h3Size * sizeof(U32); - size_t const optPotentialSpace = - ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) - + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) - ? optPotentialSpace - : 0; - DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", - (U32)chainSize, (U32)hSize, (U32)h3Size); - return tableSpace + optSpace; -} - -size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) -{ - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - U32 const divider = (cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1); - - size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq)); - - /* estimateCCtxSize is for one-shot compression. So no buffers should - * be needed. However, we still allocate two 0-sized buffers, which can - * take space under ASAN. */ - size_t const bufferSpace = ZSTD_cwksp_alloc_size(0) - + ZSTD_cwksp_alloc_size(0); - - size_t const cctxSpace = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)); - - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - - DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); - return neededSpace; - } -} - -size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); -} - -static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCCtxSize_usingCParams(cParams); -} - -size_t ZSTD_estimateCCtxSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCCtxSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} - -size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) -{ - RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); - { ZSTD_compressionParameters const cParams = - ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0); - size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); - size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - size_t const streamingSize = ZSTD_cwksp_alloc_size(inBuffSize) - + ZSTD_cwksp_alloc_size(outBuffSize); - - return CCtxSize + streamingSize; - } -} - -size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) -{ - ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); - return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); -} - -static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0); - return ZSTD_estimateCStreamSize_usingCParams(cParams); -} - -size_t ZSTD_estimateCStreamSize(int compressionLevel) -{ - int level; - size_t memBudget = 0; - for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { - size_t const newMB = ZSTD_estimateCStreamSize_internal(level); - if (newMB > memBudget) memBudget = newMB; - } - return memBudget; -} - -/* ZSTD_getFrameProgression(): - * tells how much data has been consumed (input) and produced (output) for current frame. - * able to count progression inside worker threads (non-blocking mode). - */ -ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_getFrameProgression(cctx->mtctx); - } -#endif - { ZSTD_frameProgression fp; - size_t const buffered = (cctx->inBuff == NULL) ? 0 : - cctx->inBuffPos - cctx->inToCompress; - if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); - assert(buffered <= ZSTD_BLOCKSIZE_MAX); - fp.ingested = cctx->consumedSrcSize + buffered; - fp.consumed = cctx->consumedSrcSize; - fp.produced = cctx->producedCSize; - fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ - fp.currentJobID = 0; - fp.nbActiveWorkers = 0; - return fp; -} } - -/*! ZSTD_toFlushNow() - * Only useful for multithreading scenarios currently (nbWorkers >= 1). - */ -size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - return ZSTDMT_toFlushNow(cctx->mtctx); - } -#endif - (void)cctx; - return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ -} - -static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, - ZSTD_compressionParameters cParams2) -{ - (void)cParams1; - (void)cParams2; - assert(cParams1.windowLog == cParams2.windowLog); - assert(cParams1.chainLog == cParams2.chainLog); - assert(cParams1.hashLog == cParams2.hashLog); - assert(cParams1.searchLog == cParams2.searchLog); - assert(cParams1.minMatch == cParams2.minMatch); - assert(cParams1.targetLength == cParams2.targetLength); - assert(cParams1.strategy == cParams2.strategy); -} - -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) -{ - int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - bs->rep[i] = ZSTDInternalConstants::repStartValue[i]; - bs->entropy.huf.repeatMode = HUF_repeat_none; - bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; -} - -/*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). - */ -static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) -{ - ZSTD_window_clear(&ms->window); - - ms->nextToUpdate = ms->window.dictLimit; - ms->loadedDictEnd = 0; - ms->opt.litLengthSum = 0; /* force reset of btopt stats */ - ms->dictMatchState = NULL; -} - -/** - * Indicates whether this compression proceeds directly from user-provided - * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or - * whether the context needs to buffer the input/output (ZSTDb_buffered). - */ -typedef enum { - ZSTDb_not_buffered, - ZSTDb_buffered -} ZSTD_buffered_policy_e; - -/** - * Controls, for this matchState reset, whether the tables need to be cleared / - * prepared for the coming compression (ZSTDcrp_makeClean), or whether the - * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a - * subsequent operation will overwrite the table space anyways (e.g., copying - * the matchState contents in from a CDict). - */ -typedef enum { - ZSTDcrp_makeClean, - ZSTDcrp_leaveDirty -} ZSTD_compResetPolicy_e; - -/** - * Controls, for this matchState reset, whether indexing can continue where it - * left off (ZSTDirp_continue), or whether it needs to be restarted from zero - * (ZSTDirp_reset). - */ -typedef enum { - ZSTDirp_continue, - ZSTDirp_reset -} ZSTD_indexResetPolicy_e; - -typedef enum { - ZSTD_resetTarget_CDict, - ZSTD_resetTarget_CCtx -} ZSTD_resetTarget_e; - -static size_t -ZSTD_reset_matchState(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - const ZSTD_compressionParameters* cParams, - const ZSTD_compResetPolicy_e crp, - const ZSTD_indexResetPolicy_e forceResetIndex, - const ZSTD_resetTarget_e forWho) -{ - size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); - size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; - size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; - - DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); - if (forceResetIndex == ZSTDirp_reset) { - ZSTD_window_init(&ms->window); - ZSTD_cwksp_mark_tables_dirty(ws); - } - - ms->hashLog3 = hashLog3; - - ZSTD_invalidateMatchState(ms); - - assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ - - ZSTD_cwksp_clear_tables(ws); - - DEBUGLOG(5, "reserving table space"); - /* table Space */ - ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); - ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); - ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); - - DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); - if (crp!=ZSTDcrp_leaveDirty) { - /* reset tables only */ - ZSTD_cwksp_clean_tables(ws); - } - - /* opt parser space */ - if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { - DEBUGLOG(4, "reserving optimal parser space"); - ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); - ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); - ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); - ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); - ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); - } - - ms->cParams = *cParams; - - RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, - "failed a workspace allocation in ZSTD_reset_matchState"); - - return 0; -} - -/* ZSTD_indexTooCloseToMax() : - * minor optimization : prefer memset() rather than reduceIndex() - * which is measurably slow in some circumstances (reported for Visual Studio). - * Works when re-using a context for a lot of smallish inputs : - * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, - * memset() will be triggered before reduceIndex(). - */ -#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) -static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) -{ - return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); -} - -/*! ZSTD_resetCCtx_internal() : - note : `params` are assumed fully validated at this stage */ -static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, - ZSTD_CCtx_params params, - U64 const pledgedSrcSize, - ZSTD_compResetPolicy_e const crp, - ZSTD_buffered_policy_e const zbuff) -{ - ZSTD_cwksp* const ws = &zc->workspace; - DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", - (U32)pledgedSrcSize, params.cParams.windowLog); - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - - zc->isFirstBlock = 1; - - if (params.ldmParams.enableLdm) { - /* Adjust long distance matching parameters */ - ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); - assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); - assert(params.ldmParams.hashRateLog < 32); - zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength); - } - - { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); - U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) - + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) - + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); - size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0; - size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0; - size_t const matchStateSize = ZSTD_sizeof_matchState(¶ms.cParams, /* forCCtx */ 1); - size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); - - ZSTD_indexResetPolicy_e needsIndexReset = zc->initialized ? ZSTDirp_continue : ZSTDirp_reset; - - if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { - needsIndexReset = ZSTDirp_reset; - } - - if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); - - /* Check if workspace is large enough, alloc a new one if needed */ - { size_t const cctxSpace = zc->staticSize ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; - size_t const entropySpace = ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE); - size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); - size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + ZSTD_cwksp_alloc_size(buffOutSize); - size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams); - size_t const ldmSeqSpace = ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)); - - size_t const neededSpace = - cctxSpace + - entropySpace + - blockStateSpace + - ldmSpace + - ldmSeqSpace + - matchStateSize + - tokenSpace + - bufferSpace; - - int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; - int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); - - DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers", - neededSpace>>10, matchStateSize>>10, bufferSpace>>10); - DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); - - if (workspaceTooSmall || workspaceWasteful) { - DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", - ZSTD_cwksp_sizeof(ws) >> 10, - neededSpace >> 10); - - RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); - - needsIndexReset = ZSTDirp_reset; - - ZSTD_cwksp_free(ws, zc->customMem); - FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); - - DEBUGLOG(5, "reserving object space"); - /* Statically sized space. - * entropyWorkspace never moves, - * though prev/next block swap places */ - assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); - zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); - zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); - zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, HUF_WORKSPACE_SIZE); - RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); - } } - - ZSTD_cwksp_clear(ws); - - /* init params */ - zc->appliedParams = params; - zc->blockState.matchState.cParams = params.cParams; - zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; - zc->consumedSrcSize = 0; - zc->producedCSize = 0; - if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) - zc->appliedParams.fParams.contentSizeFlag = 0; - DEBUGLOG(4, "pledged content size : %u ; flag : %u", - (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); - zc->blockSize = blockSize; - - XXH64_reset(&zc->xxhState, 0); - zc->stage = ZSTDcs_init; - zc->dictID = 0; - - ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - - /* ZSTD_wildcopy() is used to copy into the literals buffer, - * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. - */ - zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); - zc->seqStore.maxNbLit = blockSize; - - /* buffers */ - zc->inBuffSize = buffInSize; - zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); - zc->outBuffSize = buffOutSize; - zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); - - /* ldm bucketOffsets table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmBucketSize = - ((size_t)1) << (params.ldmParams.hashLog - - params.ldmParams.bucketSizeLog); - zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, ldmBucketSize); - memset(zc->ldmState.bucketOffsets, 0, ldmBucketSize); - } - - /* sequences storage */ - ZSTD_referenceExternalSequences(zc, NULL, 0); - zc->seqStore.maxNbSeq = maxNbSeq; - zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); - zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); - - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &zc->blockState.matchState, - ws, - ¶ms.cParams, - crp, - needsIndexReset, - ZSTD_resetTarget_CCtx), ""); - - /* ldm hash table */ - if (params.ldmParams.enableLdm) { - /* TODO: avoid memset? */ - size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; - zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); - memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); - zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); - zc->maxNbLdmSequences = maxNbLdmSeq; - - ZSTD_window_init(&zc->ldmState.window); - ZSTD_window_clear(&zc->ldmState.window); - zc->ldmState.loadedDictEnd = 0; - } - - DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); - zc->initialized = 1; - - return 0; - } -} - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { - int i; - for (i=0; iblockState.prevCBlock->rep[i] = 0; - assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); -} - -/* These are the approximate sizes for each strategy past which copying the - * dictionary tables into the working context is faster than using them - * in-place. - */ -static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { - 8 KB, /* unused */ - 8 KB, /* ZSTD_fast */ - 16 KB, /* ZSTD_dfast */ - 32 KB, /* ZSTD_greedy */ - 32 KB, /* ZSTD_lazy */ - 32 KB, /* ZSTD_lazy2 */ - 32 KB, /* ZSTD_btlazy2 */ - 32 KB, /* ZSTD_btopt */ - 8 KB, /* ZSTD_btultra */ - 8 KB /* ZSTD_btultra2 */ -}; - -static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize) -{ - size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; - return ( pledgedSrcSize <= cutoff - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || params->attachDictPref == ZSTD_dictForceAttach ) - && params->attachDictPref != ZSTD_dictForceCopy - && !params->forceWindow; /* dictMatchState isn't correctly - * handled in _enforceMaxDist */ -} - -static size_t -ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; - unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Resize working context table params for input only, since the dict - * has its own tables. */ - /* pledgeSrcSize == 0 means 0! */ - params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0); - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - } - - { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - - cdict->matchState.window.base); - const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; - if (cdictLen == 0) { - /* don't even attach dictionaries with no contents */ - DEBUGLOG(4, "skipping attaching empty dictionary"); - } else { - DEBUGLOG(4, "attaching dictionary into context"); - cctx->blockState.matchState.dictMatchState = &cdict->matchState; - - /* prep working match state so dict matches never have negative indices - * when they are translated to the working context's index space. */ - if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { - cctx->blockState.matchState.window.nextSrc = - cctx->blockState.matchState.window.base + cdictEnd; - ZSTD_window_clear(&cctx->blockState.matchState.window); - } - /* loadedDictEnd is expressed within the referential of the active context */ - cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } } - - cctx->dictID = cdict->dictID; - - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - - return 0; -} - -static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; - - DEBUGLOG(4, "copying dictionary into context"); - - { unsigned const windowLog = params.cParams.windowLog; - assert(windowLog != 0); - /* Copy only compression parameters related to tables. */ - params.cParams = *cdict_cParams; - params.cParams.windowLog = windowLog; - FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff), ""); - assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); - assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); - assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); - } - - ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); - - /* copy tables */ - { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); - size_t const hSize = (size_t)1 << cdict_cParams->hashLog; - - memcpy(cctx->blockState.matchState.hashTable, - cdict->matchState.hashTable, - hSize * sizeof(U32)); - memcpy(cctx->blockState.matchState.chainTable, - cdict->matchState.chainTable, - chainSize * sizeof(U32)); - } - - /* Zero the hashTable3, since the cdict never fills it */ - { int const h3log = cctx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - assert(cdict->matchState.hashLog3 == 0); - memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); - } - - ZSTD_cwksp_mark_tables_clean(&cctx->workspace); - - /* copy dictionary offsets */ - { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; - ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } - - cctx->dictID = cdict->dictID; - - /* copy block state */ - memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); - - return 0; -} - -/* We have a choice between copying the dictionary context into the working - * context, or referencing the dictionary context from the working context - * in-place. We decide here which strategy to use. */ -static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - - DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", - (unsigned)pledgedSrcSize); - - if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { - return ZSTD_resetCCtx_byAttachingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } else { - return ZSTD_resetCCtx_byCopyingCDict( - cctx, cdict, *params, pledgedSrcSize, zbuff); - } -} - -/*! ZSTD_copyCCtx_internal() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * The "context", in this case, refers to the hash and chain tables, - * entropy tables, and dictionary references. - * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. - * @return : 0, or an error code */ -static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, - const ZSTD_CCtx* srcCCtx, - ZSTD_frameParameters fParams, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(5, "ZSTD_copyCCtx_internal"); - RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, - "Can't copy a ctx that's not in init stage."); - - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - { ZSTD_CCtx_params params = dstCCtx->requestedParams; - /* Copy only compression parameters related to tables. */ - params.cParams = srcCCtx->appliedParams.cParams; - params.fParams = fParams; - ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, - ZSTDcrp_leaveDirty, zbuff); - assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); - assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); - assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); - assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); - assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); - } - - ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); - - /* copy tables */ - { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); - size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; - int const h3log = srcCCtx->blockState.matchState.hashLog3; - size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; - - memcpy(dstCCtx->blockState.matchState.hashTable, - srcCCtx->blockState.matchState.hashTable, - hSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.chainTable, - srcCCtx->blockState.matchState.chainTable, - chainSize * sizeof(U32)); - memcpy(dstCCtx->blockState.matchState.hashTable3, - srcCCtx->blockState.matchState.hashTable3, - h3Size * sizeof(U32)); - } - - ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); - - /* copy dictionary offsets */ - { - const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; - ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; - dstMatchState->window = srcMatchState->window; - dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; - } - dstCCtx->dictID = srcCCtx->dictID; - - /* copy block state */ - memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); - - return 0; -} - -/*! ZSTD_copyCCtx() : - * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. - * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). - * pledgedSrcSize==0 means "unknown". -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) -{ - ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0); - ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); - if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; - fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); - - return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, - fParams, pledgedSrcSize, - zbuff); -} - - -#define ZSTD_ROWSIZE 16 -/*! ZSTD_reduceTable() : - * reduce table indexes by `reducerValue`, or squash to zero. - * PreserveMark preserves "unsorted mark" for btlazy2 strategy. - * It must be set to a clear 0/1 value, to remove branch during inlining. - * Presume table size is a multiple of ZSTD_ROWSIZE - * to help auto-vectorization */ -FORCE_INLINE_TEMPLATE void -ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) -{ - int const nbRows = (int)size / ZSTD_ROWSIZE; - int cellNb = 0; - int rowNb; - assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ - assert(size < (1U<<31)); /* can be casted to int */ - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. - * - * This function however is intended to operate on those dirty tables and - * re-clean them. So when this function is used correctly, we can unpoison - * the memory it operated on. This introduces a blind spot though, since - * if we now try to operate on __actually__ poisoned memory, we will not - * detect that. */ - __msan_unpoison(table, size * sizeof(U32)); -#endif - - for (rowNb=0 ; rowNb < nbRows ; rowNb++) { - int column; - for (column=0; columncParams.hashLog; - ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); - } - - if (params->cParams.strategy != ZSTD_fast) { - U32 const chainSize = (U32)1 << params->cParams.chainLog; - if (params->cParams.strategy == ZSTD_btlazy2) - ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); - else - ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); - } - - if (ms->hashLog3) { - U32 const h3Size = (U32)1 << ms->hashLog3; - ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); - } -} - - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* See doc/zstd_compression_format.md for detailed format description */ - -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) -{ - const seqDef* const sequences = seqStorePtr->sequencesStart; - BYTE* const llCodeTable = seqStorePtr->llCode; - BYTE* const ofCodeTable = seqStorePtr->ofCode; - BYTE* const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - assert(nbSeq <= seqStorePtr->maxNbSeq); - for (u=0; ulongLengthID==1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID==2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -} - -/* ZSTD_useTargetCBlockSize(): - * Returns if target compressed block size param is being used. - * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. - * Returns 1 if true, 0 otherwise. */ -static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) -{ - DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); - return (cctxParams->targetCBlockSize != 0); -} - -/* ZSTD_compressSequences_internal(): - * actually compresses both literals and sequences */ -MEM_STATIC size_t -ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - void* entropyWorkspace, size_t entropyWkspSize, - const int bmi2) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - unsigned count[MaxSeq+1]; - FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef* const sequences = seqStorePtr->sequencesStart; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - BYTE* seqHead; - BYTE* lastNCount = NULL; - - DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; - size_t const litSize = (size_t)(seqStorePtr->lit - literals); - size_t const cSize = ZSTD_compressLiterals( - &prevEntropy->huf, &nextEntropy->huf, - cctxParams->cParams.strategy, - ZSTD_disableLiteralsCompression(cctxParams), - op, dstCapacity, - literals, litSize, - entropyWorkspace, entropyWkspSize, - bmi2); - FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); - assert(cSize <= dstCapacity); - op += cSize; - } - - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, "Can't fit seq hdr in output buf!"); - if (nbSeq < 128) { - *op++ = (BYTE)nbSeq; - } else if (nbSeq < LONGNBSEQ) { - op[0] = (BYTE)((nbSeq>>8) + 0x80); - op[1] = (BYTE)nbSeq; - op+=2; - } else { - op[0]=0xFF; - MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); - op+=3; - } - assert(op <= oend); - if (nbSeq==0) { - /* Copy the old tables over as if we repeated them */ - memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return (size_t)(op - ostart); - } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - assert(op <= oend); - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, - count, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->fse.litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - count, max, llCodeTable, nbSeq, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->fse.litlengthCTable, - sizeof(prevEntropy->fse.litlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for Offsets */ - { unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, - count, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->fse.offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - count, max, ofCodeTable, nbSeq, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->fse.offcodeCTable, - sizeof(prevEntropy->fse.offcodeCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - /* build CTable for MatchLengths */ - { unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp( - count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, - count, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->fse.matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable( - op, (size_t)(oend - op), - CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - count, max, mlCodeTable, nbSeq, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->fse.matchlengthCTable, - sizeof(prevEntropy->fse.matchlengthCTable), - entropyWorkspace, entropyWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - lastNCount = op; - op += countSize; - assert(op <= oend); - } } - - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, (size_t)(oend - op), - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - assert(op <= oend); - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ - if (lastNCount && (op - lastNCount) < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(op - lastNCount == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } - } - - DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); - return (size_t)(op - ostart); -} - -MEM_STATIC size_t -ZSTD_compressSequences(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - size_t srcSize, - void* entropyWorkspace, size_t entropyWkspSize, - int bmi2) -{ - size_t const cSize = ZSTD_compressSequences_internal( - seqStorePtr, prevEntropy, nextEntropy, cctxParams, - dst, dstCapacity, - entropyWorkspace, entropyWkspSize, bmi2); - if (cSize == 0) return 0; - /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. - * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. - */ - if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) - return 0; /* block not compressed */ - FORWARD_IF_ERROR(cSize, "ZSTD_compressSequences_internal failed"); - - /* Check compressibility */ - { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); - if (cSize >= maxCSize) return 0; /* block not compressed */ - } - - return cSize; -} - -/* ZSTD_selectBlockCompressor() : - * Not static, but internal use only (used by long distance matcher) - * assumption : strat is a valid strategy */ -ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) -{ - static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = { - { ZSTD_compressBlock_fast /* default for 0 */, - ZSTD_compressBlock_fast, - ZSTD_compressBlock_doubleFast, - ZSTD_compressBlock_greedy, - ZSTD_compressBlock_lazy, - ZSTD_compressBlock_lazy2, - ZSTD_compressBlock_btlazy2, - ZSTD_compressBlock_btopt, - ZSTD_compressBlock_btultra, - ZSTD_compressBlock_btultra2 }, - { ZSTD_compressBlock_fast_extDict /* default for 0 */, - ZSTD_compressBlock_fast_extDict, - ZSTD_compressBlock_doubleFast_extDict, - ZSTD_compressBlock_greedy_extDict, - ZSTD_compressBlock_lazy_extDict, - ZSTD_compressBlock_lazy2_extDict, - ZSTD_compressBlock_btlazy2_extDict, - ZSTD_compressBlock_btopt_extDict, - ZSTD_compressBlock_btultra_extDict, - ZSTD_compressBlock_btultra_extDict }, - { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, - ZSTD_compressBlock_fast_dictMatchState, - ZSTD_compressBlock_doubleFast_dictMatchState, - ZSTD_compressBlock_greedy_dictMatchState, - ZSTD_compressBlock_lazy_dictMatchState, - ZSTD_compressBlock_lazy2_dictMatchState, - ZSTD_compressBlock_btlazy2_dictMatchState, - ZSTD_compressBlock_btopt_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState, - ZSTD_compressBlock_btultra_dictMatchState } - }; - ZSTD_blockCompressor selectedCompressor; - ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); - - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; - assert(selectedCompressor != NULL); - return selectedCompressor; -} - -static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, - const BYTE* anchor, size_t lastLLSize) -{ - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; -} - -void ZSTD_resetSeqStore(seqStore_t* ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - -typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; - -static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) -{ - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ - ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTDInternalConstants::ZSTD_blockHeaderSize+1) { - ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); - return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ - } - ZSTD_resetSeqStore(&(zc->seqStore)); - /* required for optimal parser to read stats from dictionary */ - ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; - /* tell the optimal parser how we expect to compress literals */ - ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; - /* a gap between an attached dict and the current window is not safe, - * they must remain adjacent, - * and when that stops being the case, the dict must be unset */ - assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); - - /* limited update after a very long match */ - { const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const U32 current = (U32)(istart-base); - if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ - if (current > ms->nextToUpdate + 384) - ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384)); - } - - /* select and store sequences */ - { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); - size_t lastLLSize; - { int i; - for (i = 0; i < ZSTD_REP_NUM; ++i) - zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; - } - if (zc->externSeqStore.pos < zc->externSeqStore.size) { - assert(!zc->appliedParams.ldmParams.enableLdm); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&zc->externSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(zc->externSeqStore.pos <= zc->externSeqStore.size); - } else if (zc->appliedParams.ldmParams.enableLdm) { - rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; - - ldmSeqStore.seq = zc->ldmSequences; - ldmSeqStore.capacity = zc->maxNbLdmSequences; - /* Updates ldmSeqStore.size */ - FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, - &zc->appliedParams.ldmParams, - src, srcSize), ""); - /* Updates ldmSeqStore.pos */ - lastLLSize = - ZSTD_ldm_blockCompress(&ldmSeqStore, - ms, &zc->seqStore, - zc->blockState.nextCBlock->rep, - src, srcSize); - assert(ldmSeqStore.pos == ldmSeqStore.size); - } else { /* not long range mode */ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); - lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); - } - { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; - ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); - } } - return ZSTDbss_compress; -} - -static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) -{ - const seqStore_t* seqStore = ZSTD_getSeqStore(zc); - const seqDef* seqs = seqStore->sequencesStart; - size_t seqsSize = seqStore->sequences - seqs; - - ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; - size_t i; size_t position; int repIdx; - - assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); - for (i = 0, position = 0; i < seqsSize; ++i) { - outSeqs[i].offset = seqs[i].offset; - outSeqs[i].litLength = seqs[i].litLength; - outSeqs[i].matchLength = seqs[i].matchLength + MINMATCH; - - if (i == seqStore->longLengthPos) { - if (seqStore->longLengthID == 1) { - outSeqs[i].litLength += 0x10000; - } else if (seqStore->longLengthID == 2) { - outSeqs[i].matchLength += 0x10000; - } - } - - if (outSeqs[i].offset <= ZSTD_REP_NUM) { - outSeqs[i].rep = outSeqs[i].offset; - repIdx = (unsigned int)i - outSeqs[i].offset; - - if (outSeqs[i].litLength == 0) { - if (outSeqs[i].offset < 3) { - --repIdx; - } else { - repIdx = (unsigned int)i - 1; - } - ++outSeqs[i].rep; - } - assert(repIdx >= -3); - outSeqs[i].offset = repIdx >= 0 ? outSeqs[repIdx].offset : ZSTDInternalConstants::repStartValue[-repIdx - 1]; - if (outSeqs[i].rep == 4) { - --outSeqs[i].offset; - } - } else { - outSeqs[i].offset -= ZSTD_REP_NUM; - } - - position += outSeqs[i].litLength; - outSeqs[i].matchPos = (unsigned int)position; - position += outSeqs[i].matchLength; - } - zc->seqCollector.seqIndex += seqsSize; -} - -size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize) -{ - const size_t dstCapacity = ZSTD_compressBound(srcSize); - void* dst = ZSTD_malloc(dstCapacity, ZSTDInternalConstants::ZSTD_defaultCMem); - SeqCollector seqCollector; - - RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); - - seqCollector.collectSequences = 1; - seqCollector.seqStart = outSeqs; - seqCollector.seqIndex = 0; - seqCollector.maxSequences = outSeqsSize; - zc->seqCollector = seqCollector; - - ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); - ZSTD_free(dst, ZSTDInternalConstants::ZSTD_defaultCMem); - return zc->seqCollector.seqIndex; -} - -/* Returns true if the given block is a RLE block */ -static int ZSTD_isRLE(const BYTE *ip, size_t length) { - size_t i; - if (length < 2) return 1; - for (i = 1; i < length; ++i) { - if (ip[0] != ip[i]) return 0; - } - return 1; -} - -/* Returns true if the given block may be RLE. - * This is just a heuristic based on the compressibility. - * It may return both false positives and false negatives. - */ -static int ZSTD_maybeRLE(seqStore_t const* seqStore) -{ - size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); - size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); - - return nbSeqs < 4 && nbLits < 10; -} - -static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) -{ - ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; - zc->blockState.prevCBlock = zc->blockState.nextCBlock; - zc->blockState.nextCBlock = tmp; -} - -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, U32 frame) -{ - /* This the upper bound for the length of an rle block. - * This isn't the actual upper bound. Finding the real threshold - * needs further investigation. - */ - const U32 rleMaxLength = 25; - size_t cSize; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, - (unsigned)zc->blockState.matchState.nextToUpdate); - - { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } - } - - if (zc->seqCollector.collectSequences) { - ZSTD_copyBlockSequences(zc); - return 0; - } - - /* encode sequences and literals */ - cSize = ZSTD_compressSequences(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - dst, dstCapacity, - srcSize, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */, - zc->bmi2); - - if (frame && - /* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - cSize < rleMaxLength && - ZSTD_isRLE(ip, srcSize)) - { - cSize = 1; - op[0] = ip[0]; - } - -out: - if (!ZSTD_isError(cSize) && cSize > 1) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - } - /* We check that dictionaries have offset codes available for the first - * block. After the first block, the offcode table might not have large - * enough codes to represent the offsets in the data. - */ - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - - return cSize; -} - -static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const size_t bss, U32 lastBlock) -{ - DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); - if (bss == ZSTDbss_compress) { - if (/* We don't want to emit our first block as a RLE even if it qualifies because - * doing so will cause the decoder (cli only) to throw a "should consume all input error." - * This is only an issue for zstd <= v1.4.3 - */ - !zc->isFirstBlock && - ZSTD_maybeRLE(&zc->seqStore) && - ZSTD_isRLE((BYTE const*)src, srcSize)) - { - return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); - } - /* Attempt superblock compression. - * - * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the - * standard ZSTD_compressBound(). This is a problem, because even if we have - * space now, taking an extra byte now could cause us to run out of space later - * and violate ZSTD_compressBound(). - * - * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. - * - * In order to respect ZSTD_compressBound() we must attempt to emit a raw - * uncompressed block in these cases: - * * cSize == 0: Return code for an uncompressed block. - * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). - * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of - * output space. - * * cSize >= blockBound(srcSize): We have expanded the block too much so - * emit an uncompressed block. - */ - { - size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); - if (cSize != ERROR(dstSize_tooSmall)) { - size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); - if (cSize != 0 && cSize < maxCSize + ZSTDInternalConstants::ZSTD_blockHeaderSize) { - ZSTD_confirmRepcodesAndEntropyTables(zc); - return cSize; - } - } - } - } - - DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); - /* Superblock compression failed, attempt to emit a single no compress block. - * The decoder will be able to stream this block since it is uncompressed. - */ - return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); -} - -static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastBlock) -{ - size_t cSize = 0; - const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); - DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", - (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); - FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); - - cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); - - if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) - zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; - - return cSize; -} - -static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - void const* ip, - void const* iend) -{ - if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { - U32 const maxDist = (U32)1 << params->cParams.windowLog; - U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_cwksp_mark_tables_dirty(ws); - ZSTD_reduceIndex(ms, params, correction); - ZSTD_cwksp_mark_tables_clean(ws); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - /* invalidate dictionaries on overflow correction */ - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } -} - -/*! ZSTD_compress_frameChunk() : -* Compress a chunk of data into one or multiple blocks. -* All blocks will be terminated, all input will be consumed. -* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. -* Frame is supposed already started (header already produced) -* @return : compressed size, or an error code -*/ -static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 lastFrameChunk) -{ - size_t blockSize = cctx->blockSize; - size_t remaining = srcSize; - const BYTE* ip = (const BYTE*)src; - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - - assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); - - DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); - if (cctx->appliedParams.fParams.checksumFlag && srcSize) - XXH64_update(&cctx->xxhState, src, srcSize); - - while (remaining) { - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - - RETURN_ERROR_IF(dstCapacity < ZSTDInternalConstants::ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, - dstSize_tooSmall, - "not enough space to store compressed block"); - if (remaining < blockSize) blockSize = remaining; - - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); - ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); - - /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ - if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; - - { size_t cSize; - if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { - cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); - assert(cSize > 0); - assert(cSize <= blockSize + ZSTDInternalConstants::ZSTD_blockHeaderSize); - } else { - cSize = ZSTD_compressBlock_internal(cctx, - op+ZSTDInternalConstants::ZSTD_blockHeaderSize, dstCapacity-ZSTDInternalConstants::ZSTD_blockHeaderSize, - ip, blockSize, 1 /* frame */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); - - if (cSize == 0) { /* block is not compressible */ - cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - } else { - U32 const cBlockHeader = cSize == 1 ? - lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : - lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(op, cBlockHeader); - cSize += ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - } - - - ip += blockSize; - assert(remaining >= blockSize); - remaining -= blockSize; - op += cSize; - assert(dstCapacity >= cSize); - dstCapacity -= cSize; - cctx->isFirstBlock = 0; - DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", - (unsigned)cSize); - } } - - if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return (size_t)(op-ostart); -} - - -static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) -{ BYTE* const op = (BYTE*)dst; - U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ - U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ - U32 const checksumFlag = params->fParams.checksumFlag>0; - U32 const windowSize = (U32)1 << params->cParams.windowLog; - U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = params->fParams.contentSizeFlag ? - (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ - BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); - size_t pos=0; - - assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); - RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, - "dst buf is too small to fit worst-case frame header size."); - DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", - !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); - - if (params->format == ZSTD_f_zstd1) { - MEM_writeLE32(dst, ZSTD_MAGICNUMBER); - pos = 4; - } - op[pos++] = frameHeaderDescriptionByte; - if (!singleSegment) op[pos++] = windowLogByte; - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : op[pos] = (BYTE)(dictID); pos++; break; - case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; - case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; - } - switch(fcsCode) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; - case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; - case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; - case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; - } - return pos; -} - -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, - "wrong cctx stage"); - RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, - parameter_unsupported, - "incompatible with ldm"); - cctx->externSeqStore.seq = seq; - cctx->externSeqStore.size = nbSeq; - cctx->externSeqStore.capacity = nbSeq; - cctx->externSeqStore.pos = 0; - return 0; -} - - -static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - U32 frame, U32 lastFrameChunk) -{ - ZSTD_matchState_t* const ms = &cctx->blockState.matchState; - size_t fhSize = 0; - - DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", - cctx->stage, (unsigned)srcSize); - RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, - "missing init (ZSTD_compressBegin)"); - - if (frame && (cctx->stage==ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, - cctx->pledgedSrcSizePlusOne-1, cctx->dictID); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - assert(fhSize <= dstCapacity); - dstCapacity -= fhSize; - dst = (char*)dst + fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (!srcSize) return fhSize; /* do not generate an empty block if no input */ - - if (!ZSTD_window_update(&ms->window, src, srcSize)) { - ms->nextToUpdate = ms->window.dictLimit; - } - if (cctx->appliedParams.ldmParams.enableLdm) { - ZSTD_window_update(&cctx->ldmState.window, src, srcSize); - } - - if (!frame) { - /* overflow check and correction for block mode */ - ZSTD_overflowCorrectIfNeeded( - ms, &cctx->workspace, &cctx->appliedParams, - src, (BYTE const*)src + srcSize); - } - - DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); - { size_t const cSize = frame ? - ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : - ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); - FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); - cctx->consumedSrcSize += srcSize; - cctx->producedCSize += (cSize + fhSize); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - RETURN_ERROR_IF( - cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize >= %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + fhSize; - } -} - -size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); -} - - -size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) -{ - ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; - assert(!ZSTD_checkCParams(cParams)); - return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); -} - -size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); - { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } - - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); -} - -/*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* src, size_t srcSize, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; - - ZSTD_window_update(&ms->window, src, srcSize); - ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); - - if (params->ldmParams.enableLdm && ls != NULL) { - ZSTD_window_update(&ls->window, src, srcSize); - ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); - } - - /* Assert that we the ms params match the params we're being given */ - ZSTD_assertEqualCParams(params->cParams, ms->cParams); - - if (srcSize <= HASH_READ_SIZE) return 0; - - while (iend - ip > HASH_READ_SIZE) { - size_t const remaining = (size_t)(iend - ip); - size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); - const BYTE* const ichunk = ip + chunk; - - ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); - - if (params->ldmParams.enableLdm && ls != NULL) - ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); - - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, ichunk, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (chunk >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); - break; - - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (chunk >= HASH_READ_SIZE) - ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); - break; - - default: - assert(0); /* not possible : not a valid strategy id */ - } - - ip = ichunk; - } - - ms->nextToUpdate = (U32)(iend - ms->window.base); - return 0; -} - - -/* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { - U32 s; - RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted, "dict fse tables don't have all symbols"); - for (s = 0; s <= maxSymbolValue; ++s) { - RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted, "dict fse tables don't have all symbols"); - } - return 0; -} - -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize) -{ - const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ - const BYTE* const dictEnd = dictPtr + dictSize; - dictPtr += 8; - bs->entropy.huf.repeatMode = HUF_repeat_check; - - { unsigned maxSymbolValue = 255; - unsigned hasZeroWeights = 1; - size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, - dictEnd-dictPtr, &hasZeroWeights); - - /* We only set the loaded table as valid if it contains all non-zero - * weights. Otherwise, we set it to check */ - if (!hasZeroWeights) - bs->entropy.huf.repeatMode = HUF_repeat_valid; - - RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); - dictPtr += hufHeaderSize; - } - - { unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - /* fill all offset symbols to avoid garbage at end of table */ - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.offcodeCTable, - offcodeNCount, MaxOff, offcodeLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += offcodeHeaderSize; - } - - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - /* Every match length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.matchlengthCTable, - matchlengthNCount, matchlengthMaxValue, matchlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += matchlengthHeaderSize; - } - - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - /* Every literal length code must have non-zero probability */ - FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL), ""); - RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( - bs->entropy.fse.litlengthCTable, - litlengthNCount, litlengthMaxValue, litlengthLog, - workspace, HUF_WORKSPACE_SIZE)), - dictionary_corrupted, ""); - dictPtr += litlengthHeaderSize; - } - - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - bs->rep[0] = MEM_readLE32(dictPtr+0); - bs->rep[1] = MEM_readLE32(dictPtr+4); - bs->rep[2] = MEM_readLE32(dictPtr+8); - dictPtr += 12; - - return dictPtr - (const BYTE*)dict; -} - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : dictID, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed >= 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ZSTD_cwksp* ws, - ZSTD_CCtx_params const* params, - const void* dict, size_t dictSize, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff; - size_t dictID; - size_t eSize; - - ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); - assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); - - dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); - eSize = ZSTD_loadCEntropy(bs, workspace, offcodeNCount, &offcodeMaxValue, dict, dictSize); - FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); - dictPtr += eSize; - - { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - U32 offcodeMax = MaxOff; - if (dictContentSize <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ - offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ - } - /* All offset values <= dictContentSize + 128 KB must be representable */ - FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)), ""); - /* All repCodes must be <= dictContentSize and != 0*/ - { U32 u; - for (u=0; u<3; u++) { - RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); - RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); - } } - - bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; - bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; - bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; - FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( - ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); - return dictID; - } -} - -/** ZSTD_compress_insertDictionary() : -* @return : dictID, or an error code */ -static size_t -ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, - ZSTD_matchState_t* ms, - ldmState_t* ls, - ZSTD_cwksp* ws, - const ZSTD_CCtx_params* params, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - void* workspace) -{ - DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); - if ((dict==NULL) || (dictSize<8)) { - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - return 0; - } - - ZSTD_reset_compressedBlockState(bs); - - /* dict restricted modes */ - if (dictContentType == ZSTD_dct_rawContent) - return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); - - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_auto) { - DEBUGLOG(4, "raw content dictionary detected"); - return ZSTD_loadDictionaryContent( - ms, ls, ws, params, dict, dictSize, dtlm); - } - RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); - assert(0); /* impossible */ - } - - /* dict as full zstd dictionary */ - return ZSTD_loadZstdDictionary( - bs, ms, ws, params, dict, dictSize, dtlm, workspace); -} - -#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) -#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6) - -/*! ZSTD_compressBegin_internal() : - * @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) -{ - DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if ( (cdict) - && (cdict->dictContentSize > 0) - && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0) - && (params->attachDictPref != ZSTD_dictForceLoad) ) { - return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); - } - - FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, - ZSTDcrp_makeClean, zbuff) , ""); - { size_t const dictID = cdict ? - ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, - cdict->dictContentSize, dictContentType, dtlm, - cctx->entropyWorkspace) - : ZSTD_compress_insertDictionary( - cctx->blockState.prevCBlock, &cctx->blockState.matchState, - &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, - dictContentType, dtlm, cctx->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= UINT_MAX); - cctx->dictID = (U32)dictID; - } - return 0; -} - -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); - /* compression parameters verification and optimization */ - FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); - return ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, dtlm, - cdict, - params, pledgedSrcSize, - ZSTDb_not_buffered); -} - -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - return ZSTD_compressBegin_advanced_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, - NULL /*cdict*/, - &cctxParams, pledgedSrcSize); -} - -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); -} - -size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) -{ - return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); -} - - -/*! ZSTD_writeEpilogue() : -* Ends a frame. -* @return : nb of bytes written into dst (or an error code) */ -static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* op = ostart; - size_t fhSize = 0; - - DEBUGLOG(4, "ZSTD_writeEpilogue"); - RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); - FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); - dstCapacity -= fhSize; - op += fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); - MEM_writeLE32(op, cBlockHeader24); - op += ZSTDInternalConstants::ZSTD_blockHeaderSize; - dstCapacity -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - - if (cctx->appliedParams.fParams.checksumFlag) { - U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); - RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); - DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); - MEM_writeLE32(op, checksum); - op += 4; - } - - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return op-ostart; -} - -size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, - dst, dstCapacity, src, srcSize, - 1 /* frame mode */, 1 /* last chunk */); - FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); - endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); - FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); - assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); - if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); - DEBUGLOG(4, "end of frame : controlling src size"); - RETURN_ERROR_IF( - cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, - srcSize_wrong, - "error : pledgedSrcSize = %u, while realSrcSize = %u", - (unsigned)cctx->pledgedSrcSizePlusOne-1, - (unsigned)cctx->consumedSrcSize); - } - return cSize + endResult; -} - - -static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_parameters* params) -{ - ZSTD_CCtx_params const cctxParams = - ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, params); - DEBUGLOG(4, "ZSTD_compress_internal"); - return ZSTD_compress_advanced_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - &cctxParams); -} - -size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced"); - FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); - return ZSTD_compress_internal(cctx, - dst, dstCapacity, - src, srcSize, - dict, dictSize, - ¶ms); -} - -/* Internal */ -size_t ZSTD_compress_advanced_internal( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params) -{ - DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, - params, srcSize, ZSTDb_not_buffered) , ""); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0); - ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(&cctx->requestedParams, ¶ms); - DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); - assert(params.fParams.contentSizeFlag == 1); - return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); -} - -size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); - assert(cctx != NULL); - return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); -} - -size_t ZSTD_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel) -{ - size_t result; - ZSTD_CCtx ctxBody; - ZSTD_initCCtx(&ctxBody, ZSTDInternalConstants::ZSTD_defaultCMem); - result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); - ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ - return result; -} - - -/* ===== Dictionary API ===== */ - -/*! ZSTD_estimateCDictSize_advanced() : - * Estimate amount of memory that will be needed to create a dictionary with following arguments */ -size_t ZSTD_estimateCDictSize_advanced( - size_t dictSize, ZSTD_compressionParameters cParams, - ZSTD_dictLoadMethod_e dictLoadMethod) -{ - DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); - return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); -} - -size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); -} - -size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support sizeof on NULL */ - DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); - /* cdict may be in the workspace */ - return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) - + ZSTD_cwksp_sizeof(&cdict->workspace); -} - -static size_t ZSTD_initCDict_internal( - ZSTD_CDict* cdict, - const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); - assert(!ZSTD_checkCParams(cParams)); - cdict->matchState.cParams = cParams; - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { - cdict->dictContent = dictBuffer; - } else { - void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); - RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); - cdict->dictContent = internalBuffer; - memcpy(internalBuffer, dictBuffer, dictSize); - } - cdict->dictContentSize = dictSize; - - cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); - - - /* Reset the state to no dictionary */ - ZSTD_reset_compressedBlockState(&cdict->cBlockState); - FORWARD_IF_ERROR(ZSTD_reset_matchState( - &cdict->matchState, - &cdict->workspace, - &cParams, - ZSTDcrp_makeClean, - ZSTDirp_reset, - ZSTD_resetTarget_CDict), ""); - /* (Maybe) load the dictionary - * Skips loading the dictionary if it is < 8 bytes. - */ - { ZSTD_CCtx_params params; - memset(¶ms, 0, sizeof(params)); - params.compressionLevel = ZSTD_CLEVEL_DEFAULT; - params.fParams.contentSizeFlag = 1; - params.cParams = cParams; - { size_t const dictID = ZSTD_compress_insertDictionary( - &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, - ¶ms, cdict->dictContent, cdict->dictContentSize, - dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); - FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); - assert(dictID <= (size_t)(U32)-1); - cdict->dictID = (U32)dictID; - } - } - - return 0; -} - -ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, ZSTD_customMem customMem) -{ - DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType); - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { size_t const workspaceSize = - ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + - ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + - ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + - (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); - void* const workspace = ZSTD_malloc(workspaceSize, customMem); - ZSTD_cwksp ws; - ZSTD_CDict* cdict; - - if (!workspace) { - ZSTD_free(workspace, customMem); - return NULL; - } - - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - assert(cdict != NULL); - ZSTD_cwksp_move(&cdict->workspace, &ws); - cdict->customMem = customMem; - cdict->compressionLevel = 0; /* signals advanced API usage */ - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dictBuffer, dictSize, - dictLoadMethod, dictContentType, - cParams) )) { - ZSTD_freeCDict(cdict); - return NULL; - } - - return cdict; - } -} - -ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - ZSTD_CDict* cdict = ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byCopy, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); - if (cdict) - cdict->compressionLevel = compressionLevel == 0 ? ZSTD_CLEVEL_DEFAULT : compressionLevel; - return cdict; -} - -ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) -{ - ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize); - return ZSTD_createCDict_advanced(dict, dictSize, - ZSTD_dlm_byRef, ZSTD_dct_auto, - cParams, ZSTDInternalConstants::ZSTD_defaultCMem); -} - -size_t ZSTD_freeCDict(ZSTD_CDict* cdict) -{ - if (cdict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = cdict->customMem; - int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); - ZSTD_cwksp_free(&cdict->workspace, cMem); - if (!cdictInWorkspace) { - ZSTD_free(cdict, cMem); - } - return 0; - } -} - -/*! ZSTD_initStaticCDict_advanced() : - * Generate a digested dictionary in provided memory area. - * workspace: The memory area to emplace the dictionary into. - * Provided pointer must 8-bytes aligned. - * It must outlive dictionary usage. - * workspaceSize: Use ZSTD_estimateCDictSize() - * to determine how large workspace must be. - * cParams : use ZSTD_getCParams() to transform a compression level - * into its relevants cParams. - * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) - * Note : there is no corresponding "free" function. - * Since workspace was allocated externally, it must be freed externally. - */ -const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams) -{ - size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); - size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 - : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) - + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) - + matchStateSize; - ZSTD_CDict* cdict; - - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - - { - ZSTD_cwksp ws; - ZSTD_cwksp_init(&ws, workspace, workspaceSize); - cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); - if (cdict == NULL) return NULL; - ZSTD_cwksp_move(&cdict->workspace, &ws); - } - - DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", - (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); - if (workspaceSize < neededSize) return NULL; - - if (ZSTD_isError( ZSTD_initCDict_internal(cdict, - dict, dictSize, - dictLoadMethod, dictContentType, - cParams) )) - return NULL; - - return cdict; -} - -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) -{ - assert(cdict != NULL); - return cdict->matchState.cParams; -} - -/* ZSTD_compressBegin_usingCDict_advanced() : - * cdict must be != NULL */ -size_t ZSTD_compressBegin_usingCDict_advanced( - ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, - ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); - RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); - { ZSTD_CCtx_params params = cctx->requestedParams; - params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF - || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER - || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN - || cdict->compressionLevel == 0 ) - && (params.attachDictPref != ZSTD_dictForceLoad) ? - ZSTD_getCParamsFromCDict(cdict) - : ZSTD_getCParams(cdict->compressionLevel, - pledgedSrcSize, - cdict->dictContentSize); - /* Increase window log to fit the entire dictionary and source if the - * source size is known. Limit the increase to 19, which is the - * window log for compression level 1 with the largest source size. - */ - if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { - U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); - U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; - params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog); - } - params.fParams = fParams; - return ZSTD_compressBegin_internal(cctx, - NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_not_buffered); - } -} - -/* ZSTD_compressBegin_usingCDict() : - * pledgedSrcSize=0 means "unknown" - * if pledgedSrcSize>0, it will enable contentSizeFlag */ -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); - return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); -} - -size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) -{ - FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. - * Note that compression parameters are decided at CDict creation time - * while frame parameters are hardcoded */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict) -{ - ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; - return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); -} - - - -/* ****************************************************************** -* Streaming -********************************************************************/ - -ZSTD_CStream* ZSTD_createCStream(void) -{ - DEBUGLOG(3, "ZSTD_createCStream"); - return ZSTD_createCStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticCCtx(workspace, workspaceSize); -} - -ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) -{ /* CStream and CCtx are now same object */ - return ZSTD_createCCtx_advanced(customMem); -} - -size_t ZSTD_freeCStream(ZSTD_CStream* zcs) -{ - return ZSTD_freeCCtx(zcs); /* same object */ -} - - - -/*====== Initialization ======*/ - -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } - -size_t ZSTD_CStreamOutSize(void) -{ - return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTDInternalConstants::ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; -} - -static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx, - const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType, - const ZSTD_CDict* const cdict, - ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_resetCStream_internal"); - /* Finalize the compression parameters */ - params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); - /* params are supposed to be fully validated at this point */ - assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - - FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, - dict, dictSize, dictContentType, ZSTD_dtlm_fast, - cdict, - ¶ms, pledgedSrcSize, - ZSTDb_buffered) , ""); - - cctx->inToCompress = 0; - cctx->inBuffPos = 0; - cctx->inBuffTarget = cctx->blockSize - + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */ - cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; - cctx->streamStage = zcss_load; - cctx->frameEnded = 0; - return 0; /* ready to go */ -} - -/* ZSTD_resetCStream(): - * pledgedSrcSize == 0 means "unknown" */ -size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; -} - -/*! ZSTD_initCStream_internal() : - * Note : for lib/compress only. Used by zstdmt_compress.c. - * Assumption 1 : params are valid - * Assumption 2 : either dict, or cdict, is defined, not both */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_internal"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); - zcs->requestedParams = *params; - assert(!((dict) && (cdict))); /* either dict or cdict, not both */ - if (dict) { - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - } else { - /* Dictionary is cleared if !cdict */ - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - } - return 0; -} - -/* ZSTD_initCStream_usingCDict_advanced() : - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ -size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - zcs->requestedParams.fParams = fParams; - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} - -/* note : cdict must outlive compression session */ -size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); - return 0; -} - - -/* ZSTD_initCStream_advanced() : - * pledgedSrcSize must be exact. - * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ -size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, unsigned long long pss) -{ - /* for compatibility with older programs relying on this behavior. - * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. - * This line will be removed in the future. - */ - U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_advanced"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); - zcs->requestedParams = ZSTD_assignParamsToCCtxParams(&zcs->requestedParams, ¶ms); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} - -size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); - return 0; -} - -size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) -{ - /* temporary : 0 interpreted as "unknown" during transition period. - * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. - * 0 will be interpreted as "empty" in the future. - */ - U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; - DEBUGLOG(4, "ZSTD_initCStream_srcSize"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); - return 0; -} - -size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) -{ - DEBUGLOG(4, "ZSTD_initCStream"); - FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); - FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); - return 0; -} - -/*====== Compression ======*/ - -static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) -{ - size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; - if (hintInSize==0) hintInSize = cctx->blockSize; - return hintInSize; -} - -/** ZSTD_compressStream_generic(): - * internal function for all *compressStream*() variants - * non-static, because can be called from zstdmt_compress.c - * @return : hint size for next input */ -static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective const flushMode) -{ - const char* const istart = (const char*)input->src; - const char* const iend = input->size != 0 ? istart + input->size : istart; - const char* ip = input->pos != 0 ? istart + input->pos : istart; - char* const ostart = (char*)output->dst; - char* const oend = output->size != 0 ? ostart + output->size : ostart; - char* op = output->pos != 0 ? ostart + output->pos : ostart; - U32 someMoreWork = 1; - - /* check expectations */ - DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); - assert(zcs->inBuff != NULL); - assert(zcs->inBuffSize > 0); - assert(zcs->outBuff != NULL); - assert(zcs->outBuffSize > 0); - assert(output->pos <= output->size); - assert(input->pos <= input->size); - - while (someMoreWork) { - switch(zcs->streamStage) - { - case zcss_init: - RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); - - case zcss_load: - if ( (flushMode == ZSTD_e_end) - && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */ - && (zcs->inBuffPos == 0) ) { - /* shortcut to compression pass directly into output buffer */ - size_t const cSize = ZSTD_compressEnd(zcs, - op, oend-op, ip, iend-ip); - DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); - FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); - ip = iend; - op += cSize; - zcs->frameEnded = 1; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - someMoreWork = 0; break; - } - /* complete loading into inBuffer */ - { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy( - zcs->inBuff + zcs->inBuffPos, toLoad, - ip, iend-ip); - zcs->inBuffPos += loaded; - if (loaded != 0) - ip += loaded; - if ( (flushMode == ZSTD_e_continue) - && (zcs->inBuffPos < zcs->inBuffTarget) ) { - /* not enough input to fill full block : stop here */ - someMoreWork = 0; break; - } - if ( (flushMode == ZSTD_e_flush) - && (zcs->inBuffPos == zcs->inToCompress) ) { - /* empty */ - someMoreWork = 0; break; - } - } - /* compress current block (note : this stage cannot be stopped in the middle) */ - DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); - { void* cDst; - size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; - size_t oSize = oend-op; - unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); - if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress into output buffer, to skip flush stage */ - else - cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = lastBlock ? - ZSTD_compressEnd(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize) : - ZSTD_compressContinue(zcs, cDst, oSize, - zcs->inBuff + zcs->inToCompress, iSize); - FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); - zcs->frameEnded = lastBlock; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; - DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", - (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); - if (!lastBlock) - assert(zcs->inBuffTarget <= zcs->inBuffSize); - zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { /* no need to flush */ - op += cSize; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed directly in outBuffer"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - } - break; - } - zcs->outBuffContentSize = cSize; - zcs->outBuffFlushedSize = 0; - zcs->streamStage = zcss_flush; /* pass-through to flush stage */ - } - /* fall-through */ - case zcss_flush: - DEBUGLOG(5, "flush stage"); - { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), - zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", - (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); - if (flushed) - op += flushed; - zcs->outBuffFlushedSize += flushed; - if (toFlush!=flushed) { - /* flush not fully completed, presumably because dst is too small */ - assert(op==oend); - someMoreWork = 0; - break; - } - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - if (zcs->frameEnded) { - DEBUGLOG(5, "Frame completed on flush"); - someMoreWork = 0; - ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - break; - } - zcs->streamStage = zcss_load; - break; - } - - default: /* impossible */ - assert(0); - } - } - - input->pos = ip - istart; - output->pos = op - ostart; - if (zcs->frameEnded) return 0; - return ZSTD_nextInputSizeHint(zcs); -} - -static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) -{ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers >= 1) { - assert(cctx->mtctx != NULL); - return ZSTDMT_nextInputSizeHint(cctx->mtctx); - } -#endif - return ZSTD_nextInputSizeHint(cctx); - -} - -size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); - return ZSTD_nextInputSizeHint_MTorST(zcs); -} - - -size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp) -{ - DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); - /* check conditions */ - RETURN_ERROR_IF(output->pos > output->size, GENERIC, "invalid buffer"); - RETURN_ERROR_IF(input->pos > input->size, GENERIC, "invalid buffer"); - assert(cctx!=NULL); - - /* transparent initialization stage */ - if (cctx->streamStage == zcss_init) { - ZSTD_CCtx_params params = cctx->requestedParams; - ZSTD_prefixDict const prefixDict = cctx->prefixDict; - FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ - memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ - assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ - DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); - if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */ - params.cParams = ZSTD_getCParamsFromCCtxParams( - &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/); - - -#ifdef ZSTD_MULTITHREAD - if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { - params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ - } - if (params.nbWorkers > 0) { - /* mt context creation */ - if (cctx->mtctx == NULL) { - DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", - params.nbWorkers); - cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem); - RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); - } - /* mt compression */ - DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); - FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( - cctx->mtctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); - cctx->streamStage = zcss_load; - cctx->appliedParams.nbWorkers = params.nbWorkers; - } else -#endif - { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, - prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, - cctx->cdict, - params, cctx->pledgedSrcSizePlusOne-1) , ""); - assert(cctx->streamStage == zcss_load); - assert(cctx->appliedParams.nbWorkers == 0); - } } - /* end of transparent initialization stage */ - - /* compression stage */ -#ifdef ZSTD_MULTITHREAD - if (cctx->appliedParams.nbWorkers > 0) { - int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); - size_t flushMin; - assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); - if (cctx->cParamsChanged) { - ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); - cctx->cParamsChanged = 0; - } - do { - flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); - if ( ZSTD_isError(flushMin) - || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - } - FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); - } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); - DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); - /* Either we don't require maximum forward progress, we've finished the - * flush, or we are out of output space. - */ - assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); - return flushMin; - } -#endif - FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); - DEBUGLOG(5, "completed ZSTD_compressStream2"); - return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ -} - -size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; -} - -size_t ZSTD_compress2(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); - ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); - { size_t oPos = 0; - size_t iPos = 0; - size_t const result = ZSTD_compressStream2_simpleArgs(cctx, - dst, dstCapacity, &oPos, - src, srcSize, &iPos, - ZSTD_e_end); - FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); - if (result != 0) { /* compression not completed, due to lack of output space */ - assert(oPos == dstCapacity); - RETURN_ERROR(dstSize_tooSmall, ""); - } - assert(iPos == srcSize); /* all input is expected consumed */ - return oPos; - } -} - -/*====== Finalize ======*/ - -/*! ZSTD_flushStream() : - * @return : amount of data remaining to flush */ -size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); -} - - -size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) -{ - ZSTD_inBuffer input = { NULL, 0, 0 }; - size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); - FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); - if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ - /* single thread mode : attempt to calculate remaining to flush more precisely */ - { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; - size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); - size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; - DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); - return toFlush; - } -} - - -/*-===== Pre-defined compression levels =====-*/ - -#define ZSTD_MAX_CLEVEL 22 -int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } -int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { -{ /* "default" - for any srcSize > 256 KB */ - /* W, C, H, S, L, TL, strat */ - { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ - { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ - { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ - { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ - { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ - { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ - { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ - { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ - { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ - { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ - { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ - { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ - { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ - { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ - { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ - { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ - { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ - { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ - { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ - { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ - { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ - { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ - { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ -}, -{ /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ - { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ - { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ - { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ - { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ - { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ - { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ - { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ - { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ - { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ - { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ - { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ - { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ - { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ - { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ - { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ - { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ - { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ - { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ - { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ - { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ - { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ - { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ - { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ - { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ - { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ - { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ - { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -{ /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ - { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ - { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ - { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ - { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ - { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ - { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ - { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ - { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ - { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ - { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ - { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ - { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ - { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ - { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ - { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ - { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ - { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ - { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ - { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ - { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ - { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ -}, -}; - -/*! ZSTD_getCParams_internal() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. - * Use dictSize == 0 for unknown or unused. */ -static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; - size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; - U64 const rSize = unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); - int row = compressionLevel; - DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); - if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ - if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ - if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; - { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; - if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ - /* refine parameters based on srcSize & dictSize */ - return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); - } -} - -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. - * Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) -{ - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); -} - -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize); - DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - params.fParams.contentSizeFlag = 1; - return params; -} - -/*! ZSTD_getParams() : - * same idea as ZSTD_getCParams() - * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). - * Fields of `ZSTD_frameParameters` are set to default values */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { - if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; - return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp b/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp deleted file mode 100644 index ab9dfb459..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ -#include "zstd/compress/zstd_compress_literals.h" - -namespace duckdb_zstd { -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - memcpy(ostart + flSize, src, srcSize); - DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); - return srcSize + flSize; -} - -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - ostart[flSize] = *(const BYTE*)src; - DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); - return flSize+1; -} - -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2) -{ - size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", - disableLiteralCompression, (U32)srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? - HUF_compress1X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : - HUF_compress4X_repeat( - ostart+lhSize, dstCapacity-lhSize, src, srcSize, - HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, - (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ - DEBUGLOG(5, "Reusing previous huffman table"); - hType = set_repeat; - } - } - - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - if (hType == set_compressed) { - /* using a newly constructed table */ - nextHuf->repeatMode = HUF_repeat_check; - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); - return lhSize+cLitSize; -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp b/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp deleted file mode 100644 index e1cc14597..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ -#include "zstd/compress/zstd_compress_sequences.h" - -namespace duckdb_zstd { -/** - * -log2(x / 256) lookup table for x in [0, 256). - * If x == 0: Return 0 - * Else: Return floor(-log2(x / 256) * 256) - */ -static unsigned const kInverseProbabilityLog256[256] = { - 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, - 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, - 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, - 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, - 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, - 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, - 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, - 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, - 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, - 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, - 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, - 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, - 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, - 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, - 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, - 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, - 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, - 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, - 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, - 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, - 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, - 5, 4, 2, 1, -}; - -static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { - void const* ptr = ctable; - U16 const* u16ptr = (U16 const*)ptr; - U32 const maxSymbolValue = MEM_read16(u16ptr + 1); - return maxSymbolValue; -} - -/** - * Returns the cost in bytes of encoding the normalized count header. - * Returns an error if any of the helper functions return an error. - */ -static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, - size_t const nbSeq, unsigned const FSELog) -{ - BYTE wksp[FSE_NCOUNTBOUND]; - S16 norm[MaxSeq + 1]; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max), ""); - return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); -} - -/** - * Returns the cost in bits of encoding the distribution described by count - * using the entropy bound. - */ -static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) -{ - unsigned cost = 0; - unsigned s; - for (s = 0; s <= max; ++s) { - unsigned norm = (unsigned)((256 * count[s]) / total); - if (count[s] != 0 && norm == 0) - norm = 1; - assert(count[s] < total); - cost += count[s] * kInverseProbabilityLog256[norm]; - } - return cost >> 8; -} - -/** - * Returns the cost in bits of encoding the distribution in count using ctable. - * Returns an error if ctable cannot represent all the symbols in count. - */ -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max) -{ - unsigned const kAccuracyLog = 8; - size_t cost = 0; - unsigned s; - FSE_CState_t cstate; - FSE_initCState(&cstate, ctable); - if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { - DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", - ZSTD_getFSEMaxSymbolValue(ctable), max); - return ERROR(GENERIC); - } - for (s = 0; s <= max; ++s) { - unsigned const tableLog = cstate.stateLog; - unsigned const badCost = (tableLog + 1) << kAccuracyLog; - unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); - if (count[s] == 0) - continue; - if (bitCost >= badCost) { - DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); - return ERROR(GENERIC); - } - cost += (size_t)count[s] * bitCost; - } - return cost >> kAccuracyLog; -} - -/** - * Returns the cost in bits of encoding the distribution in count using the - * table described by norm. The max symbol support by norm is assumed >= max. - * norm must be valid for every symbol with non-zero probability in count. - */ -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) -{ - unsigned const shift = 8 - accuracyLog; - size_t cost = 0; - unsigned s; - assert(accuracyLog <= 8); - for (s = 0; s <= max; ++s) { - unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; - unsigned const norm256 = normAcc << shift; - assert(norm256 > 0); - assert(norm256 < 256); - cost += count[s] * kInverseProbabilityLog256[norm256]; - } - return cost >> 8; -} - -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) -{ - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - DEBUGLOG(5, "Selected set_basic"); - return set_basic; - } - DEBUGLOG(5, "Selected set_rle"); - return set_rle; - } - if (strategy < ZSTD_lazy) { - if (isDefaultAllowed) { - size_t const staticFse_nbSeq_max = 1000; - size_t const mult = 10 - strategy; - size_t const baseLog = 3; - size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ - assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ - assert(mult <= 9 && mult >= 7); - if ( (*repeatMode == FSE_repeat_valid) - && (nbSeq < staticFse_nbSeq_max) ) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( (nbSeq < dynamicFse_nbSeq_min) - || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; - } - } - } else { - size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); - size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); - size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); - size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); - - if (isDefaultAllowed) { - assert(!ZSTD_isError(basicCost)); - assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); - } - assert(!ZSTD_isError(NCountCost)); - assert(compressedCost < ERROR(maxCode)); - DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", - (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); - if (basicCost <= repeatCost && basicCost <= compressedCost) { - DEBUGLOG(5, "Selected set_basic"); - assert(isDefaultAllowed); - *repeatMode = FSE_repeat_none; - return set_basic; - } - if (repeatCost <= compressedCost) { - DEBUGLOG(5, "Selected set_repeat"); - assert(!ZSTD_isError(repeatCost)); - return set_repeat; - } - assert(compressedCost < basicCost && compressedCost < repeatCost); - } - DEBUGLOG(5, "Selected set_compressed"); - *repeatMode = FSE_repeat_check; - return set_compressed; -} - -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize) -{ - BYTE* op = (BYTE*)dst; - const BYTE* const oend = op + dstCapacity; - DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); - - switch (type) { - case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); - *op = codeTable[0]; - return 1; - case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); - return 0; - case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ - return 0; - case set_compressed: { - S16 norm[MaxSeq + 1]; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - if (count[codeTable[nbSeq-1]] > 1) { - count[codeTable[nbSeq-1]]--; - nbSeq_1--; - } - assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max), ""); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), ""); - return NCountSize; - } - } - default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); - } -} - -FORCE_INLINE_TEMPLATE size_t -ZSTD_encodeSequences_body( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - RETURN_ERROR_IF( - ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), - dstSize_tooSmall, "not enough space remaining"); - DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", - (int)(blockStream.endPtr - blockStream.startPtr), - (unsigned)dstCapacity); - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, ZSTDInternalConstants::LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ZSTDInternalConstants::ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); - if (longOffsets) { - unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); - } } - - DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); - FSE_flushCState(&blockStream, &stateMatchLength); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); - FSE_flushCState(&blockStream, &stateOffsetBits); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); - FSE_flushCState(&blockStream, &stateLitLength); - - { size_t const streamSize = BIT_closeCStream(&blockStream); - RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); - return streamSize; - } -} - -static size_t -ZSTD_encodeSequences_default( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_encodeSequences_bmi2( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -#endif - -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -{ - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); -#if DYNAMIC_BMI2 - if (bmi2) { - return ZSTD_encodeSequences_bmi2(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - } -#endif - (void)bmi2; - return ZSTD_encodeSequences_default(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp b/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp deleted file mode 100644 index 559a3a0cd..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +++ /dev/null @@ -1,842 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - /*-************************************* - * Dependencies - ***************************************/ -#include "zstd/compress/zstd_compress_superblock.h" - -#include "zstd/common/zstd_internal.h" /* ZSTD_getSequenceLength */ -#include "zstd/compress/hist.h" /* HIST_countFast_wksp */ -#include "zstd/compress/zstd_compress_internal.h" -#include "zstd/compress/zstd_compress_sequences.h" -#include "zstd/compress/zstd_compress_literals.h" - -namespace duckdb_zstd { -/*-************************************* -* Superblock entropy buffer structs -***************************************/ -/** ZSTD_hufCTablesMetadata_t : - * Stores Literals Block Type for a super-block in hType, and - * huffman tree description in hufDesBuffer. - * hufDesSize refers to the size of huffman tree description in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ -typedef struct { - symbolEncodingType_e hType; - BYTE hufDesBuffer[500]; /* TODO give name to this value */ - size_t hufDesSize; -} ZSTD_hufCTablesMetadata_t; - -/** ZSTD_fseCTablesMetadata_t : - * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and - * fse tables in fseTablesBuffer. - * fseTablesSize refers to the size of fse tables in bytes. - * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ -typedef struct { - symbolEncodingType_e llType; - symbolEncodingType_e ofType; - symbolEncodingType_e mlType; - BYTE fseTablesBuffer[500]; /* TODO give name to this value */ - size_t fseTablesSize; - size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ -} ZSTD_fseCTablesMetadata_t; - -typedef struct { - ZSTD_hufCTablesMetadata_t hufMetadata; - ZSTD_fseCTablesMetadata_t fseMetadata; -} ZSTD_entropyCTablesMetadata_t; - - -/** ZSTD_buildSuperBlockEntropy_literal() : - * Builds entropy for the super-block literals. - * Stores literals block type (raw, rle, compressed, repeat) and - * huffman description table to hufMetadata. - * @return : size of huffman description table or error code */ -static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, - const ZSTD_hufCTables_t* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_hufCTablesMetadata_t* hufMetadata, - const int disableLiteralsCompression, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); - BYTE* const nodeWksp = countWkspStart + countWkspSize; - const size_t nodeWkspSize = wkspEnd-nodeWksp; - unsigned maxSymbolValue = 255; - unsigned huffLog = HUF_TABLELOG_DEFAULT; - HUF_repeat repeat = prevHuf->repeatMode; - - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralsCompression) { - DEBUGLOG(5, "set_basic - disabled"); - hufMetadata->hType = set_basic; - return 0; - } - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) { - DEBUGLOG(5, "set_basic - too small"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Scan input and build symbol stats */ - { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); - FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); - if (largest == srcSize) { - DEBUGLOG(5, "set_rle"); - hufMetadata->hType = set_rle; - return 0; - } - if (largest <= (srcSize >> 7)+4) { - DEBUGLOG(5, "set_basic - no gain"); - hufMetadata->hType = set_basic; - return 0; - } - } - - /* Validate the previous Huffman table */ - if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { - repeat = HUF_repeat_none; - } - - /* Build Huffman Tree */ - memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, - maxSymbolValue, huffLog, - nodeWksp, nodeWkspSize); - FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); - huffLog = (U32)maxBits; - { /* Build and write the CTable */ - size_t const newCSize = HUF_estimateCompressedSize( - (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); - size_t const hSize = HUF_writeCTable( - hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), - (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog); - /* Check against repeating the previous CTable */ - if (repeat != HUF_repeat_none) { - size_t const oldCSize = HUF_estimateCompressedSize( - (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); - if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { - DEBUGLOG(5, "set_repeat - smaller"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_repeat; - return 0; - } - } - if (newCSize + hSize >= srcSize) { - DEBUGLOG(5, "set_basic - no gains"); - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - hufMetadata->hType = set_basic; - return 0; - } - DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); - hufMetadata->hType = set_compressed; - nextHuf->repeatMode = HUF_repeat_check; - return hSize; - } - } -} - -/** ZSTD_buildSuperBlockEntropy_sequences() : - * Builds entropy for the super-block sequences. - * Stores symbol compression modes and fse table to fseMetadata. - * @return : size of fse tables or error code */ -static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, - const ZSTD_fseCTables_t* prevEntropy, - ZSTD_fseCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize) -{ - BYTE* const wkspStart = (BYTE*)workspace; - BYTE* const wkspEnd = wkspStart + wkspSize; - BYTE* const countWkspStart = wkspStart; - unsigned* const countWksp = (unsigned*)workspace; - const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); - BYTE* const cTableWksp = countWkspStart + countWkspSize; - const size_t cTableWkspSize = wkspEnd-cTableWksp; - ZSTD_strategy const strategy = cctxParams->cParams.strategy; - FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; - FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; - FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; - const BYTE* const ofCodeTable = seqStorePtr->ofCode; - const BYTE* const llCodeTable = seqStorePtr->llCode; - const BYTE* const mlCodeTable = seqStorePtr->mlCode; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE* const ostart = fseMetadata->fseTablesBuffer; - BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); - BYTE* op = ostart; - - assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); - memset(workspace, 0, wkspSize); - - fseMetadata->lastCountSize = 0; - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - /* build CTable for Literal Lengths */ - { U32 LLtype; - unsigned max = MaxLL; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building LL table"); - nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; - LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - LLFSELog, prevEntropy->litlengthCTable, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(set_basic < set_compressed && set_rle < set_compressed); - assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, - countWksp, max, llCodeTable, nbSeq, ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, MaxLL, - prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); - if (LLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->llType = (symbolEncodingType_e) LLtype; - } } - /* build CTable for Offsets */ - { U32 Offtype; - unsigned max = MaxOff; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ - ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; - DEBUGLOG(5, "Building OF table"); - nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; - Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, - countWksp, max, mostFrequent, nbSeq, - OffFSELog, prevEntropy->offcodeCTable, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - defaultPolicy, strategy); - assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, - countWksp, max, ofCodeTable, nbSeq, ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, DefaultMaxOff, - prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); - if (Offtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->ofType = (symbolEncodingType_e) Offtype; - } } - /* build CTable for MatchLengths */ - { U32 MLtype; - unsigned max = MaxML; - size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ - DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); - nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; - MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, - countWksp, max, mostFrequent, nbSeq, - MLFSELog, prevEntropy->matchlengthCTable, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - ZSTD_defaultAllowed, strategy); - assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, - countWksp, max, mlCodeTable, nbSeq, ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, MaxML, - prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), - cTableWksp, cTableWkspSize); - FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); - if (MLtype == set_compressed) - fseMetadata->lastCountSize = countSize; - op += countSize; - fseMetadata->mlType = (symbolEncodingType_e) MLtype; - } } - assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); - return op-ostart; -} - - -/** ZSTD_buildSuperBlockEntropy() : - * Builds entropy for the super-block. - * @return : 0 on success or error code */ -static size_t -ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, - const ZSTD_entropyCTables_t* prevEntropy, - ZSTD_entropyCTables_t* nextEntropy, - const ZSTD_CCtx_params* cctxParams, - ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize) -{ - size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; - DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); - entropyMetadata->hufMetadata.hufDesSize = - ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, - &prevEntropy->huf, &nextEntropy->huf, - &entropyMetadata->hufMetadata, - ZSTD_disableLiteralsCompression(cctxParams), - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); - entropyMetadata->fseMetadata.fseTablesSize = - ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, - &prevEntropy->fse, &nextEntropy->fse, - cctxParams, - &entropyMetadata->fseMetadata, - workspace, wkspSize); - FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); - return 0; -} - -/** ZSTD_compressSubBlock_literal() : - * Compresses literals section for a sub-block. - * When we have to write the Huffman table we will sometimes choose a header - * size larger than necessary. This is because we have to pick the header size - * before we know the table size + compressed size, so we have a bound on the - * table size. If we guessed incorrectly, we fall back to uncompressed literals. - * - * We write the header when writeEntropy=1 and set entropyWrriten=1 when we succeeded - * in writing the header, otherwise it is set to 0. - * - * hufMetadata->hType has literals block type info. - * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. - * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. - * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block - * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block - * and the following sub-blocks' literals sections will be Treeless_Literals_Block. - * @return : compressed size of literals section of a sub-block - * Or 0 if it unable to compress. - * Or error code */ -static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - const BYTE* literals, size_t litSize, - void* dst, size_t dstSize, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - size_t const header = writeEntropy ? 200 : 0; - size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstSize; - BYTE* op = ostart + lhSize; - U32 const singleStream = lhSize == 3; - symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; - size_t cLitSize = 0; - - (void)bmi2; /* TODO bmi2... */ - - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); - - *entropyWritten = 0; - if (litSize == 0 || hufMetadata->hType == set_basic) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } else if (hufMetadata->hType == set_rle) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); - return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); - } - - assert(litSize > 0); - assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); - - if (writeEntropy && hufMetadata->hType == set_compressed) { - memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); - op += hufMetadata->hufDesSize; - cLitSize += hufMetadata->hufDesSize; - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); - } - - /* TODO bmi2 */ - { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) - : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); - op += cSize; - cLitSize += cSize; - if (cSize == 0 || ERR_isError(cSize)) { - DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); - return 0; - } - /* If we expand and we aren't writing a header then emit uncompressed */ - if (!writeEntropy && cLitSize >= litSize) { - DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - /* If we are writing headers then allow expansion that doesn't change our header size. */ - if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { - assert(cLitSize > litSize); - DEBUGLOG(5, "Literals expanded beyond allowed header size"); - return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); - } - DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - *entropyWritten = 1; - DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); - return op-ostart; -} - -static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { - const seqDef* const sstart = sequences; - const seqDef* const send = sequences + nbSeq; - const seqDef* sp = sstart; - size_t matchLengthSum = 0; - while (send-sp > 0) { - ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); - matchLengthSum += seqLen.matchLength; - sp++; - } - return matchLengthSum + litSize; -} - -/** ZSTD_compressSubBlock_sequences() : - * Compresses sequences section for a sub-block. - * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have - * symbol compression modes for the super-block. - * The first successfully compressed block will have these in its header. - * We set entropyWritten=1 when we succeed in compressing the sequences. - * The following sub-blocks will always have repeat mode. - * @return : compressed size of sequences section of a sub-block - * Or 0 if it is unable to compress - * Or error code. */ -static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, int writeEntropy, int* entropyWritten) -{ - const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - BYTE* seqHead; - - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); - - *entropyWritten = 0; - /* Sequences Header */ - RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, - dstSize_tooSmall, ""); - if (nbSeq < 0x7F) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; - else - op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; - if (nbSeq==0) { - return op - ostart; - } - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); - - if (writeEntropy) { - const U32 LLtype = fseMetadata->llType; - const U32 Offtype = fseMetadata->ofType; - const U32 MLtype = fseMetadata->mlType; - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); - *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); - memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); - op += fseMetadata->fseTablesSize; - } else { - const U32 repeat = set_repeat; - *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); - } - - { size_t const bitstreamSize = ZSTD_encodeSequences( - op, oend - op, - fseTables->matchlengthCTable, mlCode, - fseTables->offcodeCTable, ofCode, - fseTables->litlengthCTable, llCode, - sequences, nbSeq, - longOffsets, bmi2); - FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); - op += bitstreamSize; - /* zstd versions <= 1.3.4 mistakenly report corruption when - * FSE_readNCount() receives a buffer < 4 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1146. - * This can happen when the last set_compressed table present is 2 - * bytes and the bitstream is only one byte. - * In this exceedingly rare case, we will simply emit an uncompressed - * block, since it isn't worth optimizing. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { - /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ - assert(fseMetadata->lastCountSize + bitstreamSize == 3); - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " - "emitting an uncompressed block."); - return 0; - } -#endif - DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); - } - - /* zstd versions <= 1.4.0 mistakenly report error when - * sequences section body size is less than 3 bytes. - * Fixed by https://github.com/facebook/zstd/pull/1664. - * This can happen when the previous sequences section block is compressed - * with rle mode and the current block's sequences section is compressed - * with repeat mode where sequences section body size can be 1 byte. - */ -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (op-seqHead < 4) { - DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " - "an uncompressed block when sequences are < 4 bytes"); - return 0; - } -#endif - - *entropyWritten = 1; - return op - ostart; -} - -/** ZSTD_compressSubBlock() : - * Compresses a single sub-block. - * @return : compressed size of the sub-block - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const seqDef* sequences, size_t nbSeq, - const BYTE* literals, size_t litSize, - const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const int bmi2, - int writeLitEntropy, int writeSeqEntropy, - int* litEntropyWritten, int* seqEntropyWritten, - U32 lastBlock) -{ - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart + ZSTDInternalConstants::ZSTD_blockHeaderSize; - DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", - litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); - { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, - &entropyMetadata->hufMetadata, literals, litSize, - op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); - FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); - if (cLitSize == 0) return 0; - op += cLitSize; - } - { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, - &entropyMetadata->fseMetadata, - sequences, nbSeq, - llCode, mlCode, ofCode, - cctxParams, - op, oend-op, - bmi2, writeSeqEntropy, seqEntropyWritten); - FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); - if (cSeqSize == 0) return 0; - op += cSeqSize; - } - /* Write block header */ - { size_t cSize = (op-ostart)-ZSTDInternalConstants::ZSTD_blockHeaderSize; - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); - MEM_writeLE24(ostart, cBlockHeader24); - } - return op-ostart; -} - -static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, - const ZSTD_hufCTables_t* huf, - const ZSTD_hufCTablesMetadata_t* hufMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - unsigned* const countWksp = (unsigned*)workspace; - unsigned maxSymbolValue = 255; - size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - - if (hufMetadata->hType == set_basic) return litSize; - else if (hufMetadata->hType == set_rle) return 1; - else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { - size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); - if (ZSTD_isError(largest)) return litSize; - { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); - if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; - return cLitSizeEstimate + literalSectionHeaderSize; - } } - assert(0); /* impossible */ - return 0; -} - -static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, - const BYTE* codeTable, unsigned maxCode, - size_t nbSeq, const FSE_CTable* fseCTable, - const U32* additionalBits, - short const* defaultNorm, U32 defaultNormLog, - void* workspace, size_t wkspSize) -{ - unsigned* const countWksp = (unsigned*)workspace; - const BYTE* ctp = codeTable; - const BYTE* const ctStart = ctp; - const BYTE* const ctEnd = ctStart + nbSeq; - size_t cSymbolTypeSizeEstimateInBits = 0; - unsigned max = maxCode; - - HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ - if (type == set_basic) { - cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); - } else if (type == set_rle) { - cSymbolTypeSizeEstimateInBits = 0; - } else if (type == set_compressed || type == set_repeat) { - cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); - } - if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; - while (ctp < ctEnd) { - if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; - else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ - ctp++; - } - return cSymbolTypeSizeEstimateInBits / 8; -} - -static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_fseCTables_t* fseTables, - const ZSTD_fseCTablesMetadata_t* fseMetadata, - void* workspace, size_t wkspSize, - int writeEntropy) -{ - size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ - size_t cSeqSizeEstimate = 0; - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, - nbSeq, fseTables->offcodeCTable, NULL, - ZSTDInternalConstants::OF_defaultNorm, ZSTDInternalConstants::OF_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, - nbSeq, fseTables->litlengthCTable, ZSTDInternalConstants::LL_bits, - ZSTDInternalConstants::LL_defaultNorm, ZSTDInternalConstants::LL_defaultNormLog, - workspace, wkspSize); - cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, - nbSeq, fseTables->matchlengthCTable, ZSTDInternalConstants::ML_bits, - ZSTDInternalConstants::ML_defaultNorm, ZSTDInternalConstants::ML_defaultNormLog, - workspace, wkspSize); - if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; - return cSeqSizeEstimate + sequencesSectionHeaderSize; -} - -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, - const BYTE* ofCodeTable, - const BYTE* llCodeTable, - const BYTE* mlCodeTable, - size_t nbSeq, - const ZSTD_entropyCTables_t* entropy, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { - size_t cSizeEstimate = 0; - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, - nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, - workspace, wkspSize, writeSeqEntropy); - return cSizeEstimate + ZSTDInternalConstants::ZSTD_blockHeaderSize; -} - -static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) -{ - if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) - return 1; - if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) - return 1; - if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) - return 1; - return 0; -} - -/** ZSTD_compressSubBlock_multi() : - * Breaks super-block into multiple sub-blocks and compresses them. - * Entropy will be written to the first block. - * The following blocks will use repeat mode to compress. - * All sub-blocks are compressed blocks (no raw or rle blocks). - * @return : compressed size of the super block (which is multiple ZSTD blocks) - * Or 0 if it failed to compress. */ -static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, - const ZSTD_compressedBlockState_t* prevCBlock, - ZSTD_compressedBlockState_t* nextCBlock, - const ZSTD_entropyCTablesMetadata_t* entropyMetadata, - const ZSTD_CCtx_params* cctxParams, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const int bmi2, U32 lastBlock, - void* workspace, size_t wkspSize) -{ - const seqDef* const sstart = seqStorePtr->sequencesStart; - const seqDef* const send = seqStorePtr->sequences; - const seqDef* sp = sstart; - const BYTE* const lstart = seqStorePtr->litStart; - const BYTE* const lend = seqStorePtr->lit; - const BYTE* lp = lstart; - BYTE const* ip = (BYTE const*)src; - BYTE const* const iend = ip + srcSize; - BYTE* const ostart = (BYTE*)dst; - BYTE* const oend = ostart + dstCapacity; - BYTE* op = ostart; - const BYTE* llCodePtr = seqStorePtr->llCode; - const BYTE* mlCodePtr = seqStorePtr->mlCode; - const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t targetCBlockSize = cctxParams->targetCBlockSize; - size_t litSize, seqCount; - int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; - int writeSeqEntropy = 1; - int lastSequence = 0; - - DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", - (unsigned)(lend-lp), (unsigned)(send-sstart)); - - litSize = 0; - seqCount = 0; - do { - size_t cBlockSizeEstimate = 0; - if (sstart == send) { - lastSequence = 1; - } else { - const seqDef* const sequence = sp + seqCount; - lastSequence = sequence == send - 1; - litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; - seqCount++; - } - if (lastSequence) { - assert(lp <= lend); - assert(litSize <= (size_t)(lend - lp)); - litSize = (size_t)(lend - lp); - } - /* I think there is an optimization opportunity here. - * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful - * since it recalculates estimate from scratch. - * For example, it would recount literal distribution and symbol codes everytime. - */ - cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, - &nextCBlock->entropy, entropyMetadata, - workspace, wkspSize, writeLitEntropy, writeSeqEntropy); - if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { - int litEntropyWritten = 0; - int seqEntropyWritten = 0; - const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); - const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, - sp, seqCount, - lp, litSize, - llCodePtr, mlCodePtr, ofCodePtr, - cctxParams, - op, oend-op, - bmi2, writeLitEntropy, writeSeqEntropy, - &litEntropyWritten, &seqEntropyWritten, - lastBlock && lastSequence); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); - if (cSize > 0 && cSize < decompressedSize) { - DEBUGLOG(5, "Committed the sub-block"); - assert(ip + decompressedSize <= iend); - ip += decompressedSize; - sp += seqCount; - lp += litSize; - op += cSize; - llCodePtr += seqCount; - mlCodePtr += seqCount; - ofCodePtr += seqCount; - litSize = 0; - seqCount = 0; - /* Entropy only needs to be written once */ - if (litEntropyWritten) { - writeLitEntropy = 0; - } - if (seqEntropyWritten) { - writeSeqEntropy = 0; - } - } - } - } while (!lastSequence); - if (writeLitEntropy) { - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); - memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); - } - if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { - /* If we haven't written our entropy tables, then we've violated our contract and - * must emit an uncompressed block. - */ - DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); - return 0; - } - if (ip < iend) { - size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); - DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); - FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); - assert(cSize != 0); - op += cSize; - /* We have to regenerate the repcodes because we've skipped some sequences */ - if (sp < send) { - seqDef const* seq; - repcodes_t rep; - memcpy(&rep, prevCBlock->rep, sizeof(rep)); - for (seq = sstart; seq < sp; ++seq) { - rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); - } - memcpy(nextCBlock->rep, &rep, sizeof(rep)); - } - } - DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); - return op-ostart; -} - -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock) { - ZSTD_entropyCTablesMetadata_t entropyMetadata; - - FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, - &zc->blockState.prevCBlock->entropy, - &zc->blockState.nextCBlock->entropy, - &zc->appliedParams, - &entropyMetadata, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); - - return ZSTD_compressSubBlock_multi(&zc->seqStore, - zc->blockState.prevCBlock, - zc->blockState.nextCBlock, - &entropyMetadata, - &zc->appliedParams, - dst, dstCapacity, - src, srcSize, - zc->bmi2, lastBlock, - zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp b/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp deleted file mode 100644 index ecc1cdb77..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include "zstd/compress/zstd_compress_internal.h" -#include "zstd/compress/zstd_double_fast.h" - -namespace duckdb_zstd { - -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashLarge = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32 const mls = cParams->minMatch; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - - /* Always insert every fastHashFillStep position into the hash tables. - * Insert the other positions into the large hash table if their entry - * is empty. - */ - for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - U32 i; - for (i = 0; i < fastHashFillStep; ++i) { - size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); - size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); - if (i == 0) - hashSmall[smHash] = current + i; - if (i == 0 || hashLarge[lgHash] == 0) - hashLarge[lgHash] = current + i; - /* Only load extra positions for ZSTD_dtlm_full */ - if (dtlm == ZSTD_dtlm_fast) - break; - } } -} - - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_doubleFast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */, ZSTD_dictMode_e const dictMode) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - const U32 hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - const U32 hBitsS = cParams->chainLog; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - /* presumes that, if there is a dictionary, it must be using Attach mode */ - const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixLowest = base + prefixLowestIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = - dictMode == ZSTD_dictMatchState ? - &dms->cParams : NULL; - const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? - dms->hashTable : NULL; - const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? - dms->chainTable : NULL; - const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? - dictBase + dictStartIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? - dictCParams->hashLog : hBitsL; - const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? - dictCParams->chainLog : hBitsS; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); - - assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); - - /* if a dictionary is attached, it must be within window range */ - if (dictMode == ZSTD_dictMatchState) { - assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); - } - - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - U32 offset; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); - size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndexL = hashLong[h2]; - U32 matchIndexS = hashSmall[h]; - const BYTE* matchLong = base + matchIndexL; - const BYTE* match = base + matchIndexS; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashLong[h2] = hashSmall[h] = current; /* update hash tables */ - - /* check dictMatchState repcode */ - if (dictMode == ZSTD_dictMatchState - && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - /* check noDict repcode */ - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - goto _match_stored; - } - - if (matchIndexL > prefixLowestIndex) { - /* check prefix long match */ - if (MEM_read64(matchLong) == MEM_read64(ip)) { - mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; - offset = (U32)(ip-matchLong); - while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState long match */ - U32 const dictMatchIndexL = dictHashLong[dictHL]; - const BYTE* dictMatchL = dictBase + dictMatchIndexL; - assert(dictMatchL < dictEnd); - - if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { - mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; - offset = (U32)(current - dictMatchIndexL - dictIndexDelta); - while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ - goto _match_found; - } } - - if (matchIndexS > prefixLowestIndex) { - /* check prefix short match */ - if (MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dictMatchState short match */ - U32 const dictMatchIndexS = dictHashSmall[dictHS]; - match = dictBase + dictMatchIndexS; - matchIndexS = dictMatchIndexS + dictIndexDelta; - - if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { - goto _search_next_long; - } } - - ip += ((ip-anchor) >> kSearchStrength) + 1; -#if defined(__aarch64__) - PREFETCH_L1(ip+256); -#endif - continue; - -_search_next_long: - - { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); - U32 const matchIndexL3 = hashLong[hl3]; - const BYTE* matchL3 = base + matchIndexL3; - hashLong[hl3] = current + 1; - - /* check prefix long +1 match */ - if (matchIndexL3 > prefixLowestIndex) { - if (MEM_read64(matchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; - ip++; - offset = (U32)(ip-matchL3); - while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ - goto _match_found; - } - } else if (dictMode == ZSTD_dictMatchState) { - /* check dict long +1 match */ - U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; - const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; - assert(dictMatchL3 < dictEnd); - if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { - mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; - ip++; - offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); - while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ - goto _match_found; - } } } - - /* if no long +1 match, explore the short match we found */ - if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { - mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; - offset = (U32)(current - matchIndexS); - while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } else { - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - offset = (U32)(ip - match); - while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - - /* fall-through */ - -_match_found: - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - -_match_stored: - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState - && repIndex2 < prefixLowestIndex ? - dictBase + repIndex2 - dictIndexDelta : - base + repIndex2; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } - - if (dictMode == ZSTD_noDict) { - while ( (ip <= ilimit) - && ( (offset_2>0) - & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - } /* while (ip < ilimit) */ - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); - } -} - - -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const U32 mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); - case 5 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); - case 6 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); - case 7 : - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_compressBlock_doubleFast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls /* template */) -{ - ZSTD_compressionParameters const* cParams = &ms->cParams; - U32* const hashLong = ms->hashTable; - U32 const hBitsL = cParams->hashLog; - U32* const hashSmall = ms->chainTable; - U32 const hBitsS = cParams->chainLog; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - U32 offset_1=rep[0], offset_2=rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); - - /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; - const BYTE* matchLong = matchLongBase + matchLongIndex; - - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */ - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ - - if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else { - if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { - const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; - U32 offset; - mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; - offset = current - matchLongIndex; - while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; - const BYTE* match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = current + 1; - if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { - const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; - ip++; - offset = current+1 - matchIndex3; - while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ - } else { - const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - offset = current - matchIndex; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - - } else { - ip += ((ip-anchor) >> kSearchStrength) + 1; - continue; - } } - - /* move to next sequence start */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Complementary insertion */ - /* done after iLimit test, as candidates could be > iend-8 */ - { U32 const indexToInsert = current+2; - hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); - hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; - hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); - } - - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_fast.cpp b/src/duckdb/third_party/zstd/compress/zstd_fast.cpp deleted file mode 100644 index 31da71d85..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +++ /dev/null @@ -1,499 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include "zstd/compress/zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ -#include "zstd/compress/zstd_fast.h" - -namespace duckdb_zstd { - -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - const void* const end, - ZSTD_dictTableLoadMethod_e dtlm) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hBits = cParams->hashLog; - U32 const mls = cParams->minMatch; - const BYTE* const base = ms->window.base; - const BYTE* ip = base + ms->nextToUpdate; - const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; - const U32 fastHashFillStep = 3; - - /* Always insert every fastHashFillStep position into the hash table. - * Insert the other positions if their hash entry is empty. - */ - for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { - U32 const current = (U32)(ip - base); - size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); - hashTable[hash0] = current; - if (dtlm == ZSTD_dtlm_fast) continue; - /* Only load extra positions for ZSTD_dtlm_full */ - { U32 p; - for (p = 1; p < fastHashFillStep; ++p) { - size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); - if (hashTable[hash] == 0) { /* not yet filled */ - hashTable[hash] = current + p; - } } } } -} - - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_fast_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, - U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ - const BYTE* ip0 = istart; - const BYTE* ip1; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); - ip0 += (ip0 == prefixStart); - ip1 = ip0 + 1; - { U32 const current = (U32)(ip0 - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ -#ifdef __INTEL_COMPILER - /* From intel 'The vector pragma indicates that the loop should be - * vectorized if it is legal to do so'. Can be used together with - * #pragma ivdep (but have opted to exclude that because intel - * warns against using it).*/ - #pragma vector always -#endif - while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ - size_t mLength; - BYTE const* ip2 = ip0 + 2; - size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); - U32 const val0 = MEM_read32(ip0); - size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); - U32 const val1 = MEM_read32(ip1); - U32 const current0 = (U32)(ip0-base); - U32 const current1 = (U32)(ip1-base); - U32 const matchIndex0 = hashTable[h0]; - U32 const matchIndex1 = hashTable[h1]; - BYTE const* repMatch = ip2 - offset_1; - const BYTE* match0 = base + matchIndex0; - const BYTE* match1 = base + matchIndex1; - U32 offcode; - -#if defined(__aarch64__) - PREFETCH_L1(ip0+256); -#endif - - hashTable[h0] = current0; /* update hash table */ - hashTable[h1] = current1; /* update hash table */ - - assert(ip0 + 1 == ip1); - - if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { - mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; - ip0 = ip2 - mLength; - match0 = repMatch - mLength; - mLength += 4; - offcode = 0; - goto _match; - } - if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { - /* found a regular match */ - goto _offset; - } - if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { - /* found a regular match after one literal */ - ip0 = ip1; - match0 = match1; - goto _offset; - } - { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; - assert(step >= 2); - ip0 += step; - ip1 += step; - continue; - } -_offset: /* Requires: ip0, match0 */ - /* Compute the offset code */ - offset_2 = offset_1; - offset_1 = (U32)(ip0-match0); - offcode = offset_1 + ZSTD_REP_MOVE; - mLength = 4; - /* Count the backwards match length */ - while (((ip0>anchor) & (match0>prefixStart)) - && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ - -_match: /* Requires: ip0, match0, offcode */ - /* Count the forward length */ - mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); - ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); - /* match found */ - ip0 += mLength; - anchor = ip0; - - if (ip0 <= ilimit) { - /* Fill Table */ - assert(base+current0+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); - - if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ - while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; - { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); - ip0 += rLength; - ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); - anchor = ip0; - continue; /* faster when present (confirmed on gcc-8) ... (?) */ - } } } - ip1 = ip0 + 1; - } - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState == NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_fast_dictMatchState_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - HASH_READ_SIZE; - U32 offset_1=rep[0], offset_2=rep[1]; - U32 offsetSaved = 0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; - const U32* const dictHashTable = dms->hashTable; - const U32 dictStartIndex = dms->window.dictLimit; - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictStart = dictBase + dictStartIndex; - const BYTE* const dictEnd = dms->window.nextSrc; - const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); - const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); - const U32 dictHLog = dictCParams->hashLog; - - /* if a dictionary is still attached, it necessarily means that - * it is within window size. So we just check it. */ - const U32 maxDistance = 1U << cParams->windowLog; - const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); - assert(endIndex - prefixStartIndex <= maxDistance); - (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ - - /* ensure there will be no no underflow - * when translating a dict index into a local index */ - assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); - ip += (dictAndPrefixLength == 0); - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hlog, mls); - U32 const current = (U32)(ip-base); - U32 const matchIndex = hashTable[h]; - const BYTE* match = base + matchIndex; - const U32 repIndex = current + 1 - offset_1; - const BYTE* repMatch = (repIndex < prefixStartIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - hashTable[h] = current; /* update hash table */ - - if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); - } else if ( (matchIndex <= prefixStartIndex) ) { - size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); - U32 const dictMatchIndex = dictHashTable[dictHash]; - const BYTE* dictMatch = dictBase + dictMatchIndex; - if (dictMatchIndex <= dictStartIndex || - MEM_read32(dictMatch) != MEM_read32(ip)) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a dict match */ - U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta); - mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; - while (((ip>anchor) & (dictMatch>dictStart)) - && (ip[-1] == dictMatch[-1])) { - ip--; dictMatch--; mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - } else if (MEM_read32(match) != MEM_read32(ip)) { - /* it's not a match, and we're not going to check the dictionary */ - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } else { - /* found a regular match */ - U32 const offset = (U32)(ip-match); - mLength = ZSTD_count(ip+4, match+4, iend) + 4; - while (((ip>anchor) & (match>prefixStart)) - && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - assert(base+current+2 > istart); /* check base overflow */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? - dictBase - dictIndexDelta + repIndex2 : - base + repIndex2; - if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - rep[0] = offset_1 ? offset_1 : offsetSaved; - rep[1] = offset_2 ? offset_2 : offsetSaved; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - assert(ms->dictMatchState != NULL); - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - - -static size_t ZSTD_compressBlock_fast_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize, U32 const mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hlog = cParams->hashLog; - /* support stepSize of 0 */ - U32 const stepSize = cParams->targetLength + !(cParams->targetLength); - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); - const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); - const U32 dictStartIndex = lowLimit; - const BYTE* const dictStart = dictBase + dictStartIndex; - const U32 dictLimit = ms->window.dictLimit; - const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; - const BYTE* const prefixStart = base + prefixStartIndex; - const BYTE* const dictEnd = dictBase + prefixStartIndex; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - U32 offset_1=rep[0], offset_2=rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); - - /* switch to "regular" variant if extDict is invalidated due to maxDistance */ - if (prefixStartIndex == dictStartIndex) - return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hlog, mls); - const U32 matchIndex = hashTable[h]; - const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; - const BYTE* match = matchBase + matchIndex; - const U32 current = (U32)(ip-base); - const U32 repIndex = current + 1 - offset_1; - const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - hashTable[h] = current; /* update hash table */ - DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); - assert(offset_1 <= current +1); /* check repIndex */ - - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; - ip++; - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); - ip += rLength; - anchor = ip; - } else { - if ( (matchIndex < dictStartIndex) || - (MEM_read32(match) != MEM_read32(ip)) ) { - assert(stepSize >= 1); - ip += ((ip-anchor) >> kSearchStrength) + stepSize; - continue; - } - { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; - U32 const offset = current - matchIndex; - size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; - while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = offset; /* update offset history */ - ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); - ip += mLength; - anchor = ip; - } } - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; - hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex2 = current2 - offset_2; - const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ - && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; - size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); - hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } } } - - /* save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - U32 const mls = ms->cParams.minMatch; - switch(mls) - { - default: /* includes case 3 */ - case 4 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); - case 5 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); - case 6 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); - case 7 : - return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); - } -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp b/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp deleted file mode 100644 index af2d3b703..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +++ /dev/null @@ -1,1142 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include "zstd/compress/zstd_compress_internal.h" -#include "zstd/compress/zstd_lazy.h" - - -/*-************************************* -* Binary Tree search -***************************************/ - -namespace duckdb_zstd { - -static void -ZSTD_updateDUBT(ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iend, - U32 mls) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - if (idx != target) - DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", - idx, target, ms->window.dictLimit); - assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ - (void)iend; - - assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ - for ( ; idx < target ; idx++) { - size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ - U32 const matchIndex = hashTable[h]; - - U32* const nextCandidatePtr = bt + 2*(idx&btMask); - U32* const sortMarkPtr = nextCandidatePtr + 1; - - DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); - hashTable[h] = idx; /* Update Hash Table */ - *nextCandidatePtr = matchIndex; /* update BT like a chain */ - *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; - } - ms->nextToUpdate = target; -} - - -/** ZSTD_insertDUBT1() : - * sort one already inserted but unsorted position - * assumption : current >= btlow == (current - btmask) - * doesn't fail */ -static void -ZSTD_insertDUBT1(ZSTD_matchState_t* ms, - U32 current, const BYTE* inputEnd, - U32 nbCompares, U32 btLow, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const ip = (current>=dictLimit) ? base + current : dictBase + current; - const BYTE* const iend = (current>=dictLimit) ? inputEnd : dictBase + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ - U32 dummy32; /* to be nullified at the end */ - U32 const windowValid = ms->window.lowLimit; - U32 const maxDistance = 1U << cParams->windowLog; - U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; - - - DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", - current, dictLimit, windowLow); - assert(current >= btLow); - assert(ip < iend); /* condition for ZSTD_count */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); - /* note : all candidates are now supposed sorted, - * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK - * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ - - if ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ - || (current < dictLimit) /* both in extDict */) { - const BYTE* const mBase = ( (dictMode != ZSTD_extDict) - || (matchIndex+matchLength >= dictLimit)) ? - base : dictBase; - assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ - || (current < dictLimit) ); - match = mBase + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* preparation for next read of match[matchLength] */ - } - - DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", - current, matchIndex, (U32)matchLength); - - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } - - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", - matchIndex, btLow, nextPtr[1]); - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", - matchIndex, btLow, nextPtr[0]); - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; -} - - -static size_t -ZSTD_DUBT_findBetterDictMatch ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - size_t bestLength, - U32 nbCompares, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_matchState_t * const dms = ms->dictMatchState; - const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; - const U32 * const dictHashTable = dms->hashTable; - U32 const hashLog = dmsCParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 dictMatchIndex = dictHashTable[h]; - - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - U32 const current = (U32)(ip-base); - const BYTE* const dictBase = dms->window.base; - const BYTE* const dictEnd = dms->window.nextSrc; - U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); - U32 const dictLowLimit = dms->window.lowLimit; - U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; - - U32* const dictBt = dms->chainTable; - U32 const btLog = dmsCParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; - - size_t commonLengthSmaller=0, commonLengthLarger=0; - - (void)dictMode; - assert(dictMode == ZSTD_dictMatchState); - - while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { - U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dictBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (dictMatchIndex+matchLength >= dictHighLimit) - match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ - - if (matchLength > bestLength) { - U32 matchIndex = dictMatchIndex + dictIndexDelta; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { - DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", - current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex); - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - } - if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } - - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; - -} - - -static size_t -ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - size_t* offsetPtr, - U32 const mls, - const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; - - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); - - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const unsortLimit = MAX(btLow, windowLow); - - U32* nextCandidate = bt + 2*(matchIndex&btMask); - U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; - U32 nbCompares = 1U << cParams->searchLog; - U32 nbCandidates = nbCompares; - U32 previousCandidate = 0; - - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", current); - assert(ip <= iend-8); /* required for h calculation */ - - /* reach end of unsorted candidates list */ - while ( (matchIndex > unsortLimit) - && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) - && (nbCandidates > 1) ) { - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", - matchIndex); - *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ - previousCandidate = matchIndex; - matchIndex = *nextCandidate; - nextCandidate = bt + 2*(matchIndex&btMask); - unsortedMark = bt + 2*(matchIndex&btMask) + 1; - nbCandidates --; - } - - /* nullify last candidate if it's still unsorted - * simplification, detrimental to compression ratio, beneficial for speed */ - if ( (matchIndex > unsortLimit) - && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { - DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", - matchIndex); - *nextCandidate = *unsortedMark = 0; - } - - /* batch sort stacked candidates */ - matchIndex = previousCandidate; - while (matchIndex) { /* will end on matchIndex == 0 */ - U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; - U32 const nextCandidateIdx = *nextCandidateIdxPtr; - ZSTD_insertDUBT1(ms, matchIndex, iend, - nbCandidates, unsortLimit, dictMode); - matchIndex = nextCandidateIdx; - nbCandidates++; - } - - /* find longest match */ - { size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current + 8 + 1; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - matchIndex = hashTable[h]; - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match; - - if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex; - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - if (dictMode == ZSTD_dictMatchState) { - nbCompares = 0; /* in addition to avoiding checking any - * further in this loop, make sure we - * skip checking in the dictionary. */ - } - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - if (dictMode == ZSTD_dictMatchState && nbCompares) { - bestLength = ZSTD_DUBT_findBetterDictMatch( - ms, ip, iend, - offsetPtr, bestLength, nbCompares, - mls, dictMode); - } - - assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */ - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - if (bestLength >= MINMATCH) { - U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; - DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", - current, (U32)bestLength, (U32)*offsetPtr, mIndex); - } - return bestLength; - } -} - - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -FORCE_INLINE_TEMPLATE size_t -ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls /* template */, - const ZSTD_dictMode_e dictMode) -{ - DEBUGLOG(7, "ZSTD_BtFindBestMatch"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateDUBT(ms, ip, iLimit, mls); - return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); -} - - -static size_t -ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - - -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndex_internal( - ZSTD_matchState_t* ms, - const ZSTD_compressionParameters* const cParams, - const BYTE* ip, U32 const mls) -{ - U32* const hashTable = ms->hashTable; - const U32 hashLog = cParams->hashLog; - U32* const chainTable = ms->chainTable; - const U32 chainMask = (1 << cParams->chainLog) - 1; - const BYTE* const base = ms->window.base; - const U32 target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - - while(idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - ms->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { - const ZSTD_compressionParameters* const cParams = &ms->cParams; - return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); -} - - -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE_TEMPLATE -size_t ZSTD_HcFindBestMatch_generic ( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iLimit, - size_t* offsetPtr, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const chainTable = ms->chainTable; - const U32 chainSize = (1 << cParams->chainLog); - const U32 chainMask = chainSize-1; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const U32 current = (U32)(ip-base); - const U32 maxDistance = 1U << cParams->windowLog; - const U32 lowestValid = ms->window.lowLimit; - const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - const U32 isDictionary = (ms->loadedDictEnd != 0); - const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; - const U32 minChain = current > chainSize ? current - chainSize : 0; - U32 nbAttempts = 1U << cParams->searchLog; - size_t ml=4-1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); - - for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { - const BYTE* const match = base + matchIndex; - assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ - if (match[ml] == ip[ml]) /* potentially better */ - currentMl = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex; - assert(match+4 <= dictEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; - } - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - - if (matchIndex <= minChain) break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - if (dictMode == ZSTD_dictMatchState) { - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32* const dmsChainTable = dms->chainTable; - const U32 dmsChainSize = (1 << dms->cParams.chainLog); - const U32 dmsChainMask = dmsChainSize - 1; - const U32 dmsLowestIndex = dms->window.dictLimit; - const BYTE* const dmsBase = dms->window.base; - const BYTE* const dmsEnd = dms->window.nextSrc; - const U32 dmsSize = (U32)(dmsEnd - dmsBase); - const U32 dmsIndexDelta = dictLimit - dmsSize; - const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; - - matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; - - for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { - size_t currentMl=0; - const BYTE* const match = dmsBase + matchIndex; - assert(match+4 <= dmsEnd); - if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; - - /* save best solution */ - if (currentMl > ml) { - ml = currentMl; - *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; - if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ - } - - if (matchIndex <= dmsMinChain) break; - matchIndex = dmsChainTable[matchIndex & dmsChainMask]; - } - } - - return ml; -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); - } -} - - -static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); - } -} - - -FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* const iLimit, - size_t* offsetPtr) -{ - switch(ms->cParams.minMatch) - { - default : /* includes case 3 */ - case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); - case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); - case 7 : - case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); - } -} - - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -typedef enum { search_hashChain, search_binaryTree } searchMethod_e; - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_lazy_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth, - ZSTD_dictMode_e const dictMode) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 prefixLowestIndex = ms->window.dictLimit; - const BYTE* const prefixLowest = base + prefixLowestIndex; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS - : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS - : ZSTD_HcFindBestMatch_selectMLS); - U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; - - const ZSTD_matchState_t* const dms = ms->dictMatchState; - const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ? - dms->window.dictLimit : 0; - const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? - dms->window.base : NULL; - const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ? - dictBase + dictLowestIndex : NULL; - const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? - dms->window.nextSrc : NULL; - const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? - prefixLowestIndex - (U32)(dictEnd - dictBase) : - 0; - const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); - - /* init */ - ip += (dictAndPrefixLength == 0); - if (dictMode == ZSTD_noDict) { - U32 const current = (U32)(ip - base); - U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog); - U32 const maxRep = current - windowLow; - if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; - } - if (dictMode == ZSTD_dictMatchState) { - /* dictMatchState repCode checks don't currently handle repCode == 0 - * disabling. */ - assert(offset_1 <= dictAndPrefixLength); - assert(offset_2 <= dictAndPrefixLength); - } - - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - - /* check repCode */ - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) + 1 - offset_1; - const BYTE* repMatch = (dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex) ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - if (depth==0) goto _storeSequence; - } - } - if ( dictMode == ZSTD_noDict - && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { - matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; - if (depth==0) goto _storeSequence; - } - - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - if (dictMode == ZSTD_dictMatchState) { - const U32 repIndex = (U32)(ip - base) - offset_1; - const BYTE* repMatch = repIndex < prefixLowestIndex ? - dictBase + (repIndex - dictIndexDelta) : - base + repIndex; - if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; - size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; - int const gain2 = (int)(mlRep * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((mlRep >= 4) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - } - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - if (dictMode == ZSTD_noDict) { - while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) - && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ - { start--; matchLength++; } - } - if (dictMode == ZSTD_dictMatchState) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; - const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - } - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - if (dictMode == ZSTD_dictMatchState) { - while (ip <= ilimit) { - U32 const current2 = (U32)(ip-base); - U32 const repIndex = current2 - offset_2; - const BYTE* repMatch = dictMode == ZSTD_dictMatchState - && repIndex < prefixLowestIndex ? - dictBase - dictIndexDelta + repIndex : - base + repIndex; - if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) - && (MEM_read32(repMatch) == MEM_read32(ip)) ) { - const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; - } - break; - } - } - - if (dictMode == ZSTD_noDict) { - while ( ((ip <= ilimit) & (offset_2>0)) - && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { - /* store sequence */ - matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } } } - - /* Save reps for next block */ - rep[0] = offset_1 ? offset_1 : savedOffset; - rep[1] = offset_2 ? offset_2 : savedOffset; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); -} - - -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_lazy_extDict_generic( - ZSTD_matchState_t* ms, seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const searchMethod_e searchMethod, const U32 depth) -{ - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* const dictBase = ms->window.dictBase; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const dictStart = dictBase + ms->window.lowLimit; - const U32 windowLog = ms->cParams.windowLog; - - typedef size_t (*searchMax_f)( - ZSTD_matchState_t* ms, - const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = rep[0], offset_2 = rep[1]; - - DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); - - /* init */ - ip += (ip == prefixStart); - - /* Match Loop */ -#if defined(__GNUC__) && defined(__x86_64__) - /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the - * code alignment is perturbed. To fix the instability align the loop on 32-bytes. - */ - __asm__(".p2align 5"); -#endif - while (ip < ilimit) { - size_t matchLength=0; - size_t offset=0; - const BYTE* start=ip+1; - U32 current = (U32)(ip-base); - - /* check repCode */ - { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog); - const U32 repIndex = (U32)(current+1 - offset_1); - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip+1) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; - if (depth==0) goto _storeSequence; - } } - - /* first search (depth 0) */ - { size_t offsetFound = 999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset=offsetFound; - } - - if (matchLength < 4) { - ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth>=1) - while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 1 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } } - - /* let's find an even better one */ - if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - int const gain2 = (int)(repLength * 4); - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); - if ((repLength >= 4) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } } - - /* search match, depth 2 */ - { size_t offset2=999999999; - size_t const ml2 = searchMax(ms, ip, iend, &offset2); - int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ - int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); - if ((ml2 >= 4) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } } } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); - const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ - offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repCurrent = (U32)(ip-base); - const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); - const U32 repIndex = repCurrent - offset_2; - const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ - if (MEM_read32(ip) == MEM_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; - offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } } - - /* Save reps for next block */ - rep[0] = offset_1; - rep[1] = offset_2; - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); -} - -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); -} - -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); -} - -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) - -{ - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp b/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp deleted file mode 100644 index ee2480bfb..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +++ /dev/null @@ -1,623 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include "zstd/compress/zstd_ldm.h" - -#include "zstd/common/debug.h" -#include "zstd/compress/zstd_fast.h" /* ZSTD_fillHashTable() */ -#include "zstd/compress/zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ - -#define LDM_BUCKET_SIZE_LOG 3 -#define LDM_MIN_MATCH_LENGTH 64 -#define LDM_HASH_RLOG 7 -#define LDM_HASH_CHAR_OFFSET 10 - -namespace duckdb_zstd { - -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams) -{ - params->windowLog = cParams->windowLog; - ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); - DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); - if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; - if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; - if (cParams->strategy >= ZSTD_btopt) { - /* Get out of the way of the optimal parser */ - U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength); - assert(minMatch >= ZSTD_LDM_MINMATCH_MIN); - assert(minMatch <= ZSTD_LDM_MINMATCH_MAX); - params->minMatchLength = minMatch; - } - if (params->hashLog == 0) { - params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); - assert(params->hashLog <= ZSTD_HASHLOG_MAX); - } - if (params->hashRateLog == 0) { - params->hashRateLog = params->windowLog < params->hashLog - ? 0 - : params->windowLog - params->hashLog; - } - params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); -} - -size_t ZSTD_ldm_getTableSize(ldmParams_t params) -{ - size_t const ldmHSize = ((size_t)1) << params.hashLog; - size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); - size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); - size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) - + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); - return params.enableLdm ? totalSize : 0; -} - -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) -{ - return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; -} - -/** ZSTD_ldm_getSmallHash() : - * numBits should be <= 32 - * If numBits==0, returns 0. - * @return : the most significant numBits of value. */ -static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits) -{ - assert(numBits <= 32); - return numBits == 0 ? 0 : (U32)(value >> (64 - numBits)); -} - -/** ZSTD_ldm_getChecksum() : - * numBitsToDiscard should be <= 32 - * @return : the next most significant 32 bits after numBitsToDiscard */ -static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard) -{ - assert(numBitsToDiscard <= 32); - return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF; -} - -/** ZSTD_ldm_getTag() ; - * Given the hash, returns the most significant numTagBits bits - * after (32 + hbits) bits. - * - * If there are not enough bits remaining, return the last - * numTagBits bits. */ -static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits) -{ - assert(numTagBits < 32 && hbits <= 32); - if (32 - hbits < numTagBits) { - return hash & (((U32)1 << numTagBits) - 1); - } else { - return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1); - } -} - -/** ZSTD_ldm_getBucket() : - * Returns a pointer to the start of the bucket associated with hash. */ -static ldmEntry_t* ZSTD_ldm_getBucket( - ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) -{ - return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); -} - -/** ZSTD_ldm_insertEntry() : - * Insert the entry with corresponding hash into the hash table */ -static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, - size_t const hash, const ldmEntry_t entry, - ldmParams_t const ldmParams) -{ - BYTE* const bucketOffsets = ldmState->bucketOffsets; - *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry; - bucketOffsets[hash]++; - bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1; -} - -/** ZSTD_ldm_makeEntryAndInsertByTag() : - * - * Gets the small hash, checksum, and tag from the rollingHash. - * - * If the tag matches (1 << ldmParams.hashRateLog)-1, then - * creates an ldmEntry from the offset, and inserts it into the hash table. - * - * hBits is the length of the small hash, which is the most significant hBits - * of rollingHash. The checksum is the next 32 most significant bits, followed - * by ldmParams.hashRateLog bits that make up the tag. */ -static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState, - U64 const rollingHash, - U32 const hBits, - U32 const offset, - ldmParams_t const ldmParams) -{ - U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog); - U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1; - if (tag == tagMask) { - U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits); - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - ldmEntry_t entry; - entry.offset = offset; - entry.checksum = checksum; - ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams); - } -} - -/** ZSTD_ldm_countBackwardsMatch() : - * Returns the number of bytes that match backwards before pIn and pMatch. - * - * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ -static size_t ZSTD_ldm_countBackwardsMatch( - const BYTE* pIn, const BYTE* pAnchor, - const BYTE* pMatch, const BYTE* pBase) -{ - size_t matchLength = 0; - while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) { - pIn--; - pMatch--; - matchLength++; - } - return matchLength; -} - -/** ZSTD_ldm_fillFastTables() : - * - * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. - * This is similar to ZSTD_loadDictionaryContent. - * - * The tables for the other strategies are filled within their - * block compressors. */ -static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, - void const* end) -{ - const BYTE* const iend = (const BYTE*)end; - - switch(ms->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); - break; - - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); - break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - case ZSTD_btlazy2: - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - break; - default: - assert(0); /* not possible : not a valid strategy id */ - } - - return 0; -} - -/** ZSTD_ldm_fillLdmHashTable() : - * - * Fills hashTable from (lastHashed + 1) to iend (non-inclusive). - * lastHash is the rolling hash that corresponds to lastHashed. - * - * Returns the rolling hash corresponding to position iend-1. */ -static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state, - U64 lastHash, const BYTE* lastHashed, - const BYTE* iend, const BYTE* base, - U32 hBits, ldmParams_t const ldmParams) -{ - U64 rollingHash = lastHash; - const BYTE* cur = lastHashed + 1; - - while (cur < iend) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1], - cur[ldmParams.minMatchLength-1], - state->hashPower); - ZSTD_ldm_makeEntryAndInsertByTag(state, - rollingHash, hBits, - (U32)(cur - base), ldmParams); - ++cur; - } - return rollingHash; -} - -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params) -{ - DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); - if ((size_t)(iend - ip) >= params->minMatchLength) { - U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength); - ZSTD_ldm_fillLdmHashTable( - state, startingHash, ip, iend - params->minMatchLength, state->window.base, - params->hashLog - params->bucketSizeLog, - *params); - } -} - - -/** ZSTD_ldm_limitTableUpdate() : - * - * Sets cctx->nextToUpdate to a position corresponding closer to anchor - * if it is far way - * (after a long match, only update tables a limited amount). */ -static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) -{ - U32 const current = (U32)(anchor - ms->window.base); - if (current > ms->nextToUpdate + 1024) { - ms->nextToUpdate = - current - MIN(512, current - ms->nextToUpdate - 1024); - } -} - -static size_t ZSTD_ldm_generateSequences_internal( - ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - /* LDM parameters */ - int const extDict = ZSTD_window_hasExtDict(ldmState->window); - U32 const minMatchLength = params->minMatchLength; - U64 const hashPower = ldmState->hashPower; - U32 const hBits = params->hashLog - params->bucketSizeLog; - U32 const ldmBucketSize = 1U << params->bucketSizeLog; - U32 const hashRateLog = params->hashRateLog; - U32 const ldmTagMask = (1U << params->hashRateLog) - 1; - /* Prefix and extDict parameters */ - U32 const dictLimit = ldmState->window.dictLimit; - U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; - BYTE const* const base = ldmState->window.base; - BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; - BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; - BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; - BYTE const* const lowPrefixPtr = base + dictLimit; - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE); - /* Input positions */ - BYTE const* anchor = istart; - BYTE const* ip = istart; - /* Rolling hash */ - BYTE const* lastHashed = NULL; - U64 rollingHash = 0; - - while (ip <= ilimit) { - size_t mLength; - U32 const current = (U32)(ip - base); - size_t forwardMatchLength = 0, backwardMatchLength = 0; - ldmEntry_t* bestEntry = NULL; - if (ip != istart) { - rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0], - lastHashed[minMatchLength], - hashPower); - } else { - rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength); - } - lastHashed = ip; - - /* Do not insert and do not look for a match */ - if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) { - ip++; - continue; - } - - /* Get the best entry and compute the match lengths */ - { - ldmEntry_t* const bucket = - ZSTD_ldm_getBucket(ldmState, - ZSTD_ldm_getSmallHash(rollingHash, hBits), - *params); - ldmEntry_t* cur; - size_t bestMatchLength = 0; - U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits); - - for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) { - size_t curForwardMatchLength, curBackwardMatchLength, - curTotalMatchLength; - if (cur->checksum != checksum || cur->offset <= lowestIndex) { - continue; - } - if (extDict) { - BYTE const* const curMatchBase = - cur->offset < dictLimit ? dictBase : base; - BYTE const* const pMatch = curMatchBase + cur->offset; - BYTE const* const matchEnd = - cur->offset < dictLimit ? dictEnd : iend; - BYTE const* const lowMatchPtr = - cur->offset < dictLimit ? dictStart : lowPrefixPtr; - - curForwardMatchLength = ZSTD_count_2segments( - ip, pMatch, iend, - matchEnd, lowPrefixPtr); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowMatchPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } else { /* !extDict */ - BYTE const* const pMatch = base + cur->offset; - curForwardMatchLength = ZSTD_count(ip, pMatch, iend); - if (curForwardMatchLength < minMatchLength) { - continue; - } - curBackwardMatchLength = - ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch, - lowPrefixPtr); - curTotalMatchLength = curForwardMatchLength + - curBackwardMatchLength; - } - - if (curTotalMatchLength > bestMatchLength) { - bestMatchLength = curTotalMatchLength; - forwardMatchLength = curForwardMatchLength; - backwardMatchLength = curBackwardMatchLength; - bestEntry = cur; - } - } - } - - /* No match found -- continue searching */ - if (bestEntry == NULL) { - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, - hBits, current, - *params); - ip++; - continue; - } - - /* Match found */ - mLength = forwardMatchLength + backwardMatchLength; - ip -= backwardMatchLength; - - { - /* Store the sequence: - * ip = current - backwardMatchLength - * The match is at (bestEntry->offset - backwardMatchLength) - */ - U32 const matchIndex = bestEntry->offset; - U32 const offset = current - matchIndex; - rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; - - /* Out of sequence storage */ - if (rawSeqStore->size == rawSeqStore->capacity) - return ERROR(dstSize_tooSmall); - seq->litLength = (U32)(ip - anchor); - seq->matchLength = (U32)mLength; - seq->offset = offset; - rawSeqStore->size++; - } - - /* Insert the current entry into the hash table */ - ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits, - (U32)(lastHashed - base), - *params); - - assert(ip + backwardMatchLength == lastHashed); - - /* Fill the hash table from lastHashed+1 to ip+mLength*/ - /* Heuristic: don't need to fill the entire table at end of block */ - if (ip + mLength <= ilimit) { - rollingHash = ZSTD_ldm_fillLdmHashTable( - ldmState, rollingHash, lastHashed, - ip + mLength, base, hBits, *params); - lastHashed = ip + mLength - 1; - } - ip += mLength; - anchor = ip; - } - return iend - anchor; -} - -/*! ZSTD_ldm_reduceTable() : - * reduce table indexes by `reducerValue` */ -static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, - U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u].offset < reducerValue) table[u].offset = 0; - else table[u].offset -= reducerValue; - } -} - -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldmState, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize) -{ - U32 const maxDist = 1U << params->windowLog; - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - size_t const kMaxChunkSize = 1 << 20; - size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); - size_t chunk; - size_t leftoverSize = 0; - - assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); - /* Check that ZSTD_window_update() has been called for this chunk prior - * to passing it to this function. - */ - assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); - /* The input could be very large (in zstdmt), so it must be broken up into - * chunks to enforce the maximum distance and handle overflow correction. - */ - assert(sequences->pos <= sequences->size); - assert(sequences->size <= sequences->capacity); - for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { - BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; - size_t const remaining = (size_t)(iend - chunkStart); - BYTE const *const chunkEnd = - (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; - size_t const chunkSize = chunkEnd - chunkStart; - size_t newLeftoverSize; - size_t const prevSize = sequences->size; - - assert(chunkStart < iend); - /* 1. Perform overflow correction if necessary. */ - if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { - U32 const ldmHSize = 1U << params->hashLog; - U32 const correction = ZSTD_window_correctOverflow( - &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); - ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); - /* invalidate dictionaries on overflow correction */ - ldmState->loadedDictEnd = 0; - } - /* 2. We enforce the maximum offset allowed. - * - * kMaxChunkSize should be small enough that we don't lose too much of - * the window through early invalidation. - * TODO: * Test the chunk size. - * * Try invalidation after the sequence generation and test the - * the offset against maxDist directly. - * - * NOTE: Because of dictionaries + sequence splitting we MUST make sure - * that any offset used is valid at the END of the sequence, since it may - * be split into two sequences. This condition holds when using - * ZSTD_window_enforceMaxDist(), but if we move to checking offsets - * against maxDist directly, we'll have to carefully handle that case. - */ - ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); - /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ - newLeftoverSize = ZSTD_ldm_generateSequences_internal( - ldmState, sequences, params, chunkStart, chunkSize); - if (ZSTD_isError(newLeftoverSize)) - return newLeftoverSize; - /* 4. We add the leftover literals from previous iterations to the first - * newly generated sequence, or add the `newLeftoverSize` if none are - * generated. - */ - /* Prepend the leftover literals from the last call */ - if (prevSize < sequences->size) { - sequences->seq[prevSize].litLength += (U32)leftoverSize; - leftoverSize = newLeftoverSize; - } else { - assert(newLeftoverSize == chunkSize); - leftoverSize += chunkSize; - } - } - return 0; -} - -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { - while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { - rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; - if (srcSize <= seq->litLength) { - /* Skip past srcSize literals */ - seq->litLength -= (U32)srcSize; - return; - } - srcSize -= seq->litLength; - seq->litLength = 0; - if (srcSize < seq->matchLength) { - /* Skip past the first srcSize of the match */ - seq->matchLength -= (U32)srcSize; - if (seq->matchLength < minMatch) { - /* The match is too short, omit it */ - if (rawSeqStore->pos + 1 < rawSeqStore->size) { - seq[1].litLength += seq[0].matchLength; - } - rawSeqStore->pos++; - } - return; - } - srcSize -= seq->matchLength; - seq->matchLength = 0; - rawSeqStore->pos++; - } -} - -/** - * If the sequence length is longer than remaining then the sequence is split - * between this block and the next. - * - * Returns the current sequence to handle, or if the rest of the block should - * be literals, it returns a sequence with offset == 0. - */ -static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, - U32 const remaining, U32 const minMatch) -{ - rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; - assert(sequence.offset > 0); - /* Likely: No partial sequence */ - if (remaining >= sequence.litLength + sequence.matchLength) { - rawSeqStore->pos++; - return sequence; - } - /* Cut the sequence short (offset == 0 ==> rest is literals). */ - if (remaining <= sequence.litLength) { - sequence.offset = 0; - } else if (remaining < sequence.litLength + sequence.matchLength) { - sequence.matchLength = remaining - sequence.litLength; - if (sequence.matchLength < minMatch) { - sequence.offset = 0; - } - } - /* Skip past `remaining` bytes for the future sequences. */ - ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); - return sequence; -} - -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - unsigned const minMatch = cParams->minMatch; - ZSTD_blockCompressor const blockCompressor = - ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); - /* Input bounds */ - BYTE const* const istart = (BYTE const*)src; - BYTE const* const iend = istart + srcSize; - /* Input positions */ - BYTE const* ip = istart; - - DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); - assert(rawSeqStore->pos <= rawSeqStore->size); - assert(rawSeqStore->size <= rawSeqStore->capacity); - /* Loop through each sequence and apply the block compressor to the lits */ - while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { - /* maybeSplitSequence updates rawSeqStore->pos */ - rawSeq const sequence = maybeSplitSequence(rawSeqStore, - (U32)(iend - ip), minMatch); - int i; - /* End signal */ - if (sequence.offset == 0) - break; - - assert(ip + sequence.litLength + sequence.matchLength <= iend); - - /* Fill tables for block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Run the block compressor */ - DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); - { - size_t const newLitLength = - blockCompressor(ms, seqStore, rep, ip, sequence.litLength); - ip += sequence.litLength; - /* Update the repcodes */ - for (i = ZSTD_REP_NUM - 1; i > 0; i--) - rep[i] = rep[i-1]; - rep[0] = sequence.offset; - /* Store the sequence */ - ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, - sequence.offset + ZSTD_REP_MOVE, - sequence.matchLength - MINMATCH); - ip += sequence.matchLength; - } - } - /* Fill the tables for the block compressor */ - ZSTD_ldm_limitTableUpdate(ms, ip); - ZSTD_ldm_fillFastTables(ms, ip); - /* Compress the last literals */ - return blockCompressor(ms, seqStore, rep, ip, iend - ip); -} - -} diff --git a/src/duckdb/third_party/zstd/compress/zstd_opt.cpp b/src/duckdb/third_party/zstd/compress/zstd_opt.cpp deleted file mode 100644 index 09e9bff21..000000000 --- a/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +++ /dev/null @@ -1,1204 +0,0 @@ -/* - * Copyright (c) 2016-2020, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#include "zstd/compress/zstd_compress_internal.h" -#include "zstd/compress/hist.h" -#include "zstd/compress/zstd_opt.h" - - -#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ -#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ -#define ZSTD_MAX_PRICE (1<<30) - -#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ - - -/*-************************************* -* Price functions for optimal parser -***************************************/ - -#if 0 /* approximation at bit level */ -# define BITCOST_ACCURACY 0 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) -#elif 0 /* fractional bit accuracy */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) -#else /* opt==approx, ultra==accurate */ -# define BITCOST_ACCURACY 8 -# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) -# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) -#endif - -namespace duckdb_zstd { - -MEM_STATIC U32 ZSTD_bitWeight(U32 stat) -{ - return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); -} - -MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) -{ - U32 const stat = rawStat + 1; - U32 const hb = ZSTD_highbit32(stat); - U32 const BWeight = hb * BITCOST_MULTIPLIER; - U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + BITCOST_ACCURACY < 31); - return weight; -} - -#if (DEBUGLEVEL>=2) -/* debugging function, - * @return price in bytes as fractional value - * for debug messages only */ -MEM_STATIC double ZSTD_fCost(U32 price) -{ - return (double)price / (BITCOST_MULTIPLIER*8); -} -#endif - -static int ZSTD_compressedLiterals(optState_t const* const optPtr) -{ - return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; -} - -static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) -{ - if (ZSTD_compressedLiterals(optPtr)) - optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); - optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); - optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); - optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); -} - - -/* ZSTD_downscaleStat() : - * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) - * return the resulting sum of elements */ -static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) -{ - U32 s, sum=0; - DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); - assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); - for (s=0; s> (ZSTD_FREQ_DIV+malus)); - sum += table[s]; - } - return sum; -} - -/* ZSTD_rescaleFreqs() : - * if first block (detected by optPtr->litLengthSum == 0) : init statistics - * take hints from dictionary if there is one - * or init from zero, using src for literals stats, or flat 1 for match symbols - * otherwise downscale existing stats, to be used as seed for next block. - */ -static void -ZSTD_rescaleFreqs(optState_t* const optPtr, - const BYTE* const src, size_t const srcSize, - int const optLevel) -{ - int const compressedLiterals = ZSTD_compressedLiterals(optPtr); - DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); - optPtr->priceType = zop_dynamic; - - if (optPtr->litLengthSum == 0) { /* first block : init */ - if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ - DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); - optPtr->priceType = zop_predef; - } - - assert(optPtr->symbolCosts != NULL); - if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { - /* huffman table presumed generated by dictionary */ - optPtr->priceType = zop_dynamic; - - if (compressedLiterals) { - unsigned lit; - assert(optPtr->litFreq != NULL); - optPtr->litSum = 0; - for (lit=0; lit<=MaxLit; lit++) { - U32 const scaleLog = 11; /* scale to 2K */ - U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); - assert(bitCost <= scaleLog); - optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litSum += optPtr->litFreq[lit]; - } } - - { unsigned ll; - FSE_CState_t llstate; - FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); - optPtr->litLengthSum = 0; - for (ll=0; ll<=MaxLL; ll++) { - U32 const scaleLog = 10; /* scale to 1K */ - U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); - assert(bitCost < scaleLog); - optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->litLengthSum += optPtr->litLengthFreq[ll]; - } } - - { unsigned ml; - FSE_CState_t mlstate; - FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); - optPtr->matchLengthSum = 0; - for (ml=0; ml<=MaxML; ml++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); - assert(bitCost < scaleLog); - optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; - } } - - { unsigned of; - FSE_CState_t ofstate; - FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); - optPtr->offCodeSum = 0; - for (of=0; of<=MaxOff; of++) { - U32 const scaleLog = 10; - U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); - assert(bitCost < scaleLog); - optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; - optPtr->offCodeSum += optPtr->offCodeFreq[of]; - } } - - } else { /* not a dictionary */ - - assert(optPtr->litFreq != NULL); - if (compressedLiterals) { - unsigned lit = MaxLit; - HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - } - - { unsigned ll; - for (ll=0; ll<=MaxLL; ll++) - optPtr->litLengthFreq[ll] = 1; - } - optPtr->litLengthSum = MaxLL+1; - - { unsigned ml; - for (ml=0; ml<=MaxML; ml++) - optPtr->matchLengthFreq[ml] = 1; - } - optPtr->matchLengthSum = MaxML+1; - - { unsigned of; - for (of=0; of<=MaxOff; of++) - optPtr->offCodeFreq[of] = 1; - } - optPtr->offCodeSum = MaxOff+1; - - } - - } else { /* new block : re-use previous statistics, scaled down */ - - if (compressedLiterals) - optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); - optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); - } - - ZSTD_setBasePrices(optPtr, optLevel); -} - -/* ZSTD_rawLiteralsCost() : - * price of literals (only) in specified segment (which length can be 0). - * does not include price of literalLength symbol */ -static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, - const optState_t* const optPtr, - int optLevel) -{ - if (litLength == 0) return 0; - - if (!ZSTD_compressedLiterals(optPtr)) - return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ - - if (optPtr->priceType == zop_predef) - return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ - - /* dynamic statistics */ - { U32 price = litLength * optPtr->litSumBasePrice; - U32 u; - for (u=0; u < litLength; u++) { - assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ - price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); - } - return price; - } -} - -/* ZSTD_litLengthPrice() : - * cost of literalLength symbol */ -static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) -{ - if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); - - /* dynamic statistics */ - { U32 const llCode = ZSTD_LLcode(litLength); - return (ZSTDInternalConstants::LL_bits[llCode] * BITCOST_MULTIPLIER) - + optPtr->litLengthSumBasePrice - - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); - } -} - -/* ZSTD_getMatchPrice() : - * Provides the cost of the match part (offset + matchLength) of a sequence - * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. - * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ -FORCE_INLINE_TEMPLATE U32 -ZSTD_getMatchPrice(U32 const offset, - U32 const matchLength, - const optState_t* const optPtr, - int const optLevel) -{ - U32 price; - U32 const offCode = ZSTD_highbit32(offset+1); - U32 const mlBase = matchLength - MINMATCH; - assert(matchLength >= MINMATCH); - - if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ - return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); - - /* dynamic statistics */ - price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); - if ((optLevel<2) /*static*/ && offCode >= 20) - price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ - - /* match Length */ - { U32 const mlCode = ZSTD_MLcode(mlBase); - price += (ZSTDInternalConstants::ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); - } - - price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ - - DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); - return price; -} - -/* ZSTD_updateStats() : - * assumption : literals + litLengtn <= iend */ -static void ZSTD_updateStats(optState_t* const optPtr, - U32 litLength, const BYTE* literals, - U32 offsetCode, U32 matchLength) -{ - /* literals */ - if (ZSTD_compressedLiterals(optPtr)) { - U32 u; - for (u=0; u < litLength; u++) - optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; - } - - /* literal Length */ - { U32 const llCode = ZSTD_LLcode(litLength); - optPtr->litLengthFreq[llCode]++; - optPtr->litLengthSum++; - } - - /* match offset code (0-2=>repCode; 3+=>offset+2) */ - { U32 const offCode = ZSTD_highbit32(offsetCode+1); - assert(offCode <= MaxOff); - optPtr->offCodeFreq[offCode]++; - optPtr->offCodeSum++; - } - - /* match Length */ - { U32 const mlBase = matchLength - MINMATCH; - U32 const mlCode = ZSTD_MLcode(mlBase); - optPtr->matchLengthFreq[mlCode]++; - optPtr->matchLengthSum++; - } -} - - -/* ZSTD_readMINMATCH() : - * function safe only for comparisons - * assumption : memPtr must be at least 4 bytes before end of buffer */ -MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) -{ - switch (length) - { - default : - case 4 : return MEM_read32(memPtr); - case 3 : if (MEM_isLittleEndian()) - return MEM_read32(memPtr)<<8; - else - return MEM_read32(memPtr)>>8; - } -} - - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip) -{ - U32* const hashTable3 = ms->hashTable3; - U32 const hashLog3 = ms->hashLog3; - const BYTE* const base = ms->window.base; - U32 idx = *nextToUpdate3; - U32 const target = (U32)(ip - base); - size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); - assert(hashLog3 > 0); - - while(idx < target) { - hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; - idx++; - } - - *nextToUpdate3 = target; - return hashTable3[hash3]; -} - - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. - * ip : assumed <= iend-8 . - * @return : nb of positions added */ -static U32 ZSTD_insertBt1( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - U32 const mls, const int extDict) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32* const hashTable = ms->hashTable; - U32 const hashLog = cParams->hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const base = ms->window.base; - const BYTE* const dictBase = ms->window.dictBase; - const U32 dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - const BYTE* match; - const U32 current = (U32)(ip-base); - const U32 btLow = btMask >= current ? 0 : current - btMask; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; - U32 matchEndIdx = current+8+1; - size_t bestLength = 8; - U32 nbCompares = 1U << cParams->searchLog; -#ifdef ZSTD_C_PREDICT - U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); - U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1); - predictedSmall += (predictedSmall>0); - predictedLarge += (predictedLarge>0); -#endif /* ZSTD_C_PREDICT */ - - DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); - - assert(ip <= iend-8); /* required for h calculation */ - hashTable[h] = current; /* Update Hash Table */ - - assert(windowLow > 0); - while (nbCompares-- && (matchIndex >= windowLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(matchIndex < current); - -#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ - const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ - if (matchIndex == predictedSmall) { - /* no need to check length, result known */ - *smallerPtr = matchIndex; - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - predictedSmall = predictPtr[1] + (predictPtr[1]>0); - continue; - } - if (matchIndex == predictedLarge) { - *largerPtr = matchIndex; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - predictedLarge = predictPtr[0] + (predictPtr[0]>0); - continue; - } -#endif - - if (!extDict || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ - match = base + matchIndex; - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ - } - - if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ - /* match is smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ - } else { - /* match is larger than current */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - { U32 positions = 0; - if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return MAX(positions, matchEndIdx - (current + 8)); - } -} - -FORCE_INLINE_TEMPLATE -void ZSTD_updateTree_internal( - ZSTD_matchState_t* ms, - const BYTE* const ip, const BYTE* const iend, - const U32 mls, const ZSTD_dictMode_e dictMode) -{ - const BYTE* const base = ms->window.base; - U32 const target = (U32)(ip - base); - U32 idx = ms->nextToUpdate; - DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", - idx, target, dictMode); - - while(idx < target) { - U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); - assert(idx < (U32)(idx + forward)); - idx += forward; - } - assert((size_t)(ip - base) <= (size_t)(U32)(-1)); - assert((size_t)(iend - base) <= (size_t)(U32)(-1)); - ms->nextToUpdate = target; -} - -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { - ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); -} - -FORCE_INLINE_TEMPLATE -U32 ZSTD_insertBtAndGetAllMatches ( - ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ - const U32 lengthToBeat, - U32 const mls /* template */) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - const BYTE* const base = ms->window.base; - U32 const current = (U32)(ip-base); - U32 const hashLog = cParams->hashLog; - U32 const minMatch = (mls==3) ? 3 : 4; - U32* const hashTable = ms->hashTable; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 matchIndex = hashTable[h]; - U32* const bt = ms->chainTable; - U32 const btLog = cParams->chainLog - 1; - U32 const btMask= (1U << btLog) - 1; - size_t commonLengthSmaller=0, commonLengthLarger=0; - const BYTE* const dictBase = ms->window.dictBase; - U32 const dictLimit = ms->window.dictLimit; - const BYTE* const dictEnd = dictBase + dictLimit; - const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = (btMask >= current) ? 0 : current - btMask; - U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); - U32 const matchLow = windowLow ? windowLow : 1; - U32* smallerPtr = bt + 2*(current&btMask); - U32* largerPtr = bt + 2*(current&btMask) + 1; - U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */ - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - U32 nbCompares = 1U << cParams->searchLog; - - const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; - const ZSTD_compressionParameters* const dmsCParams = - dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; - const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; - const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; - U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; - U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; - U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; - U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; - U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; - U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; - U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; - - size_t bestLength = lengthToBeat-1; - DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); - - /* check repCode */ - assert(ll0 <= 1); /* necessarily 1 or 0 */ - { U32 const lastR = ZSTD_REP_NUM + ll0; - U32 repCode; - for (repCode = ll0; repCode < lastR; repCode++) { - U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - U32 const repIndex = current - repOffset; - U32 repLen = 0; - assert(current >= dictLimit); - if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < current-dictLimit) { /* equivalent to `current > repIndex >= dictLimit` */ - /* We must validate the repcode offset because when we're using a dictionary the - * valid offset range shrinks when the dictionary goes out of bounds. - */ - if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { - repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; - } - } else { /* repIndex < dictLimit || repIndex >= current */ - const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? - dmsBase + repIndex - dmsIndexDelta : - dictBase + repIndex; - assert(current >= windowLow); - if ( dictMode == ZSTD_extDict - && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */ - & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; - } - if (dictMode == ZSTD_dictMatchState - && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */ - & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { - repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; - } } - /* save longer solution */ - if (repLen > bestLength) { - DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", - repCode, ll0, repOffset, repLen); - bestLength = repLen; - matches[mnum].off = repCode - ll0; - matches[mnum].len = (U32)repLen; - mnum++; - if ( (repLen > sufficient_len) - | (ip+repLen == iLimit) ) { /* best possible */ - return mnum; - } } } } - - /* HC3 match finder */ - if ((mls == 3) /*static*/ && (bestLength < mls)) { - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); - if ((matchIndex3 >= matchLow) - & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { - size_t mlen; - if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { - const BYTE* const match = base + matchIndex3; - mlen = ZSTD_count(ip, match, iLimit); - } else { - const BYTE* const match = dictBase + matchIndex3; - mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); - } - - /* save best solution */ - if (mlen >= mls /* == 3 > bestLength */) { - DEBUGLOG(8, "found small match with hlog3, of length %u", - (U32)mlen); - bestLength = mlen; - assert(current > matchIndex3); - assert(mnum==0); /* no prior solution */ - matches[0].off = (current - matchIndex3) + ZSTD_REP_MOVE; - matches[0].len = (U32)mlen; - mnum = 1; - if ( (mlen > sufficient_len) | - (ip+mlen == iLimit) ) { /* best possible length */ - ms->nextToUpdate = current+1; /* skip insertion */ - return 1; - } } } - /* no dictMatchState lookup: dicts don't have a populated HC3 table */ - } - - hashTable[h] = current; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex >= matchLow)) { - U32* const nextPtr = bt + 2*(matchIndex & btMask); - const BYTE* match; - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - assert(current > matchIndex); - - if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { - assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ - match = base + matchIndex; - if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); - } else { - match = dictBase + matchIndex; - assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* prepare for match[matchLength] read */ - } - - if (matchLength > bestLength) { - DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - assert(matchEndIdx > matchIndex); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ - break; /* drop, to preserve bt consistency (miss a little bit of compression) */ - } - } - - if (match[matchLength] < ip[matchLength]) { - /* match smaller than current */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ - matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ - } else { - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } } - - *smallerPtr = *largerPtr = 0; - - if (dictMode == ZSTD_dictMatchState && nbCompares) { - size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); - U32 dictMatchIndex = dms->hashTable[dmsH]; - const U32* const dmsBt = dms->chainTable; - commonLengthSmaller = commonLengthLarger = 0; - while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { - const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE* match = dmsBase + dictMatchIndex; - matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); - if (dictMatchIndex+matchLength >= dmsHighLimit) - match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ - - if (matchLength > bestLength) { - matchIndex = dictMatchIndex + dmsIndexDelta; - DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", - (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE); - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE; - matches[mnum].len = (U32)matchLength; - mnum++; - if ( (matchLength > ZSTD_OPT_NUM) - | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - } - - if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ - if (match[matchLength] < ip[matchLength]) { - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ - } else { - /* match is larger than current */ - commonLengthLarger = matchLength; - dictMatchIndex = nextPtr[0]; - } - } - } - - assert(matchEndIdx > current+8); - ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ - return mnum; -} - - -FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( - ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ - ZSTD_matchState_t* ms, - U32* nextToUpdate3, - const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - const U32 rep[ZSTD_REP_NUM], - U32 const ll0, - U32 const lengthToBeat) -{ - const ZSTD_compressionParameters* const cParams = &ms->cParams; - U32 const matchLengthSearch = cParams->minMatch; - DEBUGLOG(8, "ZSTD_BtGetAllMatches"); - if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ - ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); - switch(matchLengthSearch) - { - case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); - default : - case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); - case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); - } -} - - -/*-******************************* -* Optimal parser -*********************************/ - - -static U32 ZSTD_totalLen(ZSTD_optimal_t sol) -{ - return sol.litlen + sol.mlen; -} - -#if 0 /* debug */ - -static void -listStats(const U32* table, int lastEltID) -{ - int const nbElts = lastEltID + 1; - int enb; - for (enb=0; enb < nbElts; enb++) { - (void)table; - /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ - RAWLOG(2, "%4i,", table[enb]); - } - RAWLOG(2, " \n"); -} - -#endif - -FORCE_INLINE_TEMPLATE size_t -ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize, - const int optLevel, - const ZSTD_dictMode_e dictMode) -{ - optState_t* const optStatePtr = &ms->opt; - const BYTE* const istart = (const BYTE*)src; - const BYTE* ip = istart; - const BYTE* anchor = istart; - const BYTE* const iend = istart + srcSize; - const BYTE* const ilimit = iend - 8; - const BYTE* const base = ms->window.base; - const BYTE* const prefixStart = base + ms->window.dictLimit; - const ZSTD_compressionParameters* const cParams = &ms->cParams; - - U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); - U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; - U32 nextToUpdate3 = ms->nextToUpdate; - - ZSTD_optimal_t* const opt = optStatePtr->priceTable; - ZSTD_match_t* const matches = optStatePtr->matchTable; - ZSTD_optimal_t lastSequence; - - /* init */ - DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", - (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); - assert(optLevel <= 2); - ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); - ip += (ip==prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, last_pos = 0; - - /* find first match */ - { U32 const litlen = (U32)(ip - anchor); - U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); - if (!nbMatches) { ip++; continue; } - - /* initialize opt[0] */ - { U32 i ; for (i=0; i immediate encoding */ - { U32 const maxML = matches[nbMatches-1].len; - U32 const maxOffset = matches[nbMatches-1].off; - DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", - nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); - - if (maxML > sufficient_len) { - lastSequence.litlen = litlen; - lastSequence.mlen = maxML; - lastSequence.off = maxOffset; - DEBUGLOG(6, "large match (%u>%u), immediate encoding", - maxML, sufficient_len); - cur = 0; - last_pos = ZSTD_totalLen(lastSequence); - goto _shortestPath; - } } - - /* set prices for first matches starting position == 0 */ - { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 pos; - U32 matchNb; - for (pos = 1; pos < minMatch; pos++) { - opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ - } - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const end = matches[matchNb].len; - for ( ; pos <= end ; pos++ ) { - U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); - U32 const sequencePrice = literalsPrice + matchPrice; - DEBUGLOG(7, "rPos:%u => set initial price : %.2f", - pos, ZSTD_fCost(sequencePrice)); - opt[pos].mlen = pos; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = sequencePrice; - } } - last_pos = pos-1; - } - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - const BYTE* const inr = ip + cur; - assert(cur < ZSTD_OPT_NUM); - DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) - - /* Fix current position with one literal if cheaper */ - { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; - int const price = opt[cur-1].price - + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) - + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) - - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); - assert(price < 1000000000); /* overflow check */ - if (price <= opt[cur].price) { - DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, - opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); - opt[cur].mlen = 0; - opt[cur].off = 0; - opt[cur].litlen = litlen; - opt[cur].price = price; - } else { - DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", - inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), - opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); - } - } - - /* Set the repcodes of the current position. We must do it here - * because we rely on the repcodes of the 2nd to last sequence being - * correct to set the next chunks repcodes during the backward - * traversal. - */ - ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); - assert(cur >= opt[cur].mlen); - if (opt[cur].mlen != 0) { - U32 const prev = cur - opt[cur].mlen; - repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); - memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); - } else { - memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); - } - - /* last match must start at a minimum distance of 8 from oend */ - if (inr > ilimit) continue; - - if (cur == last_pos) break; - - if ( (optLevel==0) /*static_test*/ - && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { - DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); - continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ - } - - { U32 const ll0 = (opt[cur].mlen != 0); - U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; - U32 const previousPrice = opt[cur].price; - U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); - U32 matchNb; - if (!nbMatches) { - DEBUGLOG(7, "rPos:%u : no match found", cur); - continue; - } - - { U32 const maxML = matches[nbMatches-1].len; - DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", - inr-istart, cur, nbMatches, maxML); - - if ( (maxML > sufficient_len) - || (cur + maxML >= ZSTD_OPT_NUM) ) { - lastSequence.mlen = maxML; - lastSequence.off = matches[nbMatches-1].off; - lastSequence.litlen = litlen; - cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ - last_pos = cur + ZSTD_totalLen(lastSequence); - if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ - goto _shortestPath; - } } - - /* set prices using matches found at position == cur */ - for (matchNb = 0; matchNb < nbMatches; matchNb++) { - U32 const offset = matches[matchNb].off; - U32 const lastML = matches[matchNb].len; - U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; - U32 mlen; - - DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", - matchNb, matches[matchNb].off, lastML, litlen); - - for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ - U32 const pos = cur + mlen; - int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); - - if ((pos > last_pos) || (price < opt[pos].price)) { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ - opt[pos].mlen = mlen; - opt[pos].off = offset; - opt[pos].litlen = litlen; - opt[pos].price = price; - } else { - DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", - pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); - if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ - } - } } } - } /* for (cur = 1; cur <= last_pos; cur++) */ - - lastSequence = opt[last_pos]; - cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ - assert(cur < ZSTD_OPT_NUM); /* control overflow*/ - -_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ - assert(opt[0].mlen == 0); - - /* Set the next chunk's repcodes based on the repcodes of the beginning - * of the last match, and the last sequence. This avoids us having to - * update them while traversing the sequences. - */ - if (lastSequence.mlen != 0) { - repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); - memcpy(rep, &reps, sizeof(reps)); - } else { - memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); - } - - { U32 const storeEnd = cur + 1; - U32 storeStart = storeEnd; - U32 seqPos = cur; - - DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", - last_pos, cur); (void)last_pos; - assert(storeEnd < ZSTD_OPT_NUM); - DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); - opt[storeEnd] = lastSequence; - while (seqPos > 0) { - U32 const backDist = ZSTD_totalLen(opt[seqPos]); - storeStart--; - DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", - seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); - opt[storeStart] = opt[seqPos]; - seqPos = (seqPos > backDist) ? seqPos - backDist : 0; - } - - /* save sequences */ - DEBUGLOG(6, "sending selected sequences into seqStore") - { U32 storePos; - for (storePos=storeStart; storePos <= storeEnd; storePos++) { - U32 const llen = opt[storePos].litlen; - U32 const mlen = opt[storePos].mlen; - U32 const offCode = opt[storePos].off; - U32 const advance = llen + mlen; - DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", - anchor - istart, (unsigned)llen, (unsigned)mlen); - - if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ - assert(storePos == storeEnd); /* must be last sequence */ - ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ - continue; /* will finish */ - } - - assert(anchor + llen <= iend); - ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); - ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); - anchor += advance; - ip = anchor; - } } - ZSTD_setBasePrices(optStatePtr, optLevel); - } - } /* while (ip < ilimit) */ - - /* Return the last literals size */ - return (size_t)(iend - anchor); -} - - -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btopt"); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); -} - - -/* used in 2-pass strategy */ -static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) -{ - U32 s, sum=0; - assert(ZSTD_FREQ_DIV+bonus >= 0); - for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); - optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); - optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); - optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); -} - -/* ZSTD_initStats_ultra(): - * make a first compression pass, just to seed stats with more accurate starting values. - * only works on first block, with no dictionary and no ldm. - * this function cannot error, hence its contract must be respected. - */ -static void -ZSTD_initStats_ultra(ZSTD_matchState_t* ms, - seqStore_t* seqStore, - U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ - memcpy(tmpRep, rep, sizeof(tmpRep)); - - DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); - assert(ms->opt.litLengthSum == 0); /* first block */ - assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ - assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ - assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ - - ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ - - /* invalidate first scan from history */ - ZSTD_resetSeqStore(seqStore); - ms->window.base -= srcSize; - ms->window.dictLimit += (U32)srcSize; - ms->window.lowLimit = ms->window.dictLimit; - ms->nextToUpdate = ms->window.dictLimit; - - /* re-inforce weight of collected statistics */ - ZSTD_upscaleStats(&ms->opt); -} - -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - U32 const current = (U32)((const BYTE*)src - ms->window.base); - DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); - - /* 2-pass strategy: - * this strategy makes a first pass over first block to collect statistics - * and seed next round's statistics with it. - * After 1st pass, function forgets everything, and starts a new block. - * Consequently, this can only work if no data has been previously loaded in tables, - * aka, no dictionary, no prefix, no ldm preprocessing. - * The compression ratio gain is generally small (~0.5% on first block), - * the cost is 2x cpu time on first block. */ - assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - if ( (ms->opt.litLengthSum==0) /* first block */ - && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ - && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ - && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ - && (srcSize > ZSTD_PREDEF_THRESHOLD) - ) { - ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); - } - - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); -} - -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); -} - -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); -} - -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - const void* src, size_t srcSize) -{ - return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); -} - -/* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ - -} diff --git a/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp b/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp deleted file mode 100644 index 3c9a8cdff..000000000 --- a/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +++ /dev/null @@ -1,1251 +0,0 @@ -/* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Dependencies -****************************************************************/ -#include /* memcpy, memset */ -#include "zstd/common/compiler.h" -#include "zstd/common/bitstream.h" /* BIT_* */ -#include "zstd/common/fse.h" /* to compress headers */ -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/common/error_private.h" - -namespace duckdb_zstd { -/* ************************************************************** -* Macros -****************************************************************/ - -/* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. - */ -#if defined(HUF_FORCE_DECOMPRESS_X1) && \ - defined(HUF_FORCE_DECOMPRESS_X2) -#error "Cannot force the use of the X1 and X2 decoders at the same time!" -#endif - - -/* ************************************************************** -* Error Management -****************************************************************/ -// #define HUF_isError ERR_isError - - -/* ************************************************************** -* Byte alignment for workSpace management -****************************************************************/ -#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) -#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) - - -/* ************************************************************** -* BMI2 Variant Wrappers -****************************************************************/ -#if DYNAMIC_BMI2 - -#define HUF_DGEN(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#else - -#define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } - -#endif - - -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ -typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; - -static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) -{ - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; -} - - -#ifndef HUF_FORCE_DECOMPRESS_X2 - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ - -size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) -{ - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void* const dtPtr = DTable + 1; - HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; - - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* Table header */ - { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); - } - - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; n> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } - return iSize; -} - -size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX1_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - -FORCE_INLINE_TEMPLATE BYTE -HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; -} - -#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) - -#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) - -HINT_INLINE size_t -HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - } - - /* [0-3] symbols remaining */ - if (MEM_32bits()) - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - - /* no more data to retrieve from bitstream, no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - - return pEnd-pStart; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + dstSize; - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); - - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - - return dstSize; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - 3; - const void* const dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - U32 endSignal = 1; - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - - /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; - } - - /* check corruption */ - /* note : should not be necessary : op# advance in lock step, and we control op4. - * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } - - /* decoded size */ - return dstSize; - } -} - - -typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, - const void *cSrc, - size_t cSrcSize, - const HUF_DTable *DTable); - -HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) - - - -size_t HUF_decompress1X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} - - -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} - -size_t HUF_decompress4X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} - -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); -} - - -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -#endif /* HUF_FORCE_DECOMPRESS_X2 */ - - -#ifndef HUF_FORCE_DECOMPRESS_X1 - -/* *************************/ -/* double-symbols decoding */ -/* *************************/ - -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; - - -/* HUF_fillDTableX2Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX2 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; - - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill skipped values */ - if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } - - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ - - rankVal[weight] += length; - } } -} - - -static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); - } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; - } -} - -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; - U32 *rankStart; - - rankValCol_t* rankVal; - U32* rankStats; - U32* rankStart0; - sortedSymbol_t* sortedSymbol; - BYTE* weightList; - size_t spaceUsed32 = 0; - - rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); - spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); - - DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; - - /* check result */ - if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ - - /* Get start index of each weight */ - { U32 w, nextRankStart = 0; - for (w=1; w> consumed; - } } } } - - HUF_fillDTableX2(dt, maxTableLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); - - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; -} - -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} - - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -FORCE_INLINE_TEMPLATE U32 -HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } - return 1; -} - -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) - -HINT_INLINE size_t -HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, - const HUF_DEltX2* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } - - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); - - return p-pStart; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BIT_DStream_t bitD; - - /* Init */ - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - /* decode */ - { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); - } - - /* check */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; -} - -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - (sizeof(size_t)-1); - const void* const dtPtr = DTable+1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal = 1; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; -#else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = (U32)LIKELY( - (U32)(BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (U32)(BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (U32)(BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (U32)(BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); -#endif - } - - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } - - /* decoded size */ - return dstSize; - } -} - -HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) - -size_t HUF_decompress1X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} - - -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} - -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} - -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); -} - - -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} - -#endif /* HUF_FORCE_DECOMPRESS_X1 */ - - -/* ***********************************/ -/* Universal decompression selectors */ -/* ***********************************/ - -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} - -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} - - -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) -typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = -{ - /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ -}; -#endif - -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) -{ - assert(dstSize > 0); - assert(dstSize <= 128*1024); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dstSize; - (void)cSrcSize; - return 0; -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dstSize; - (void)cSrcSize; - return 1; -#else - /* decoder timing evaluation */ - { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ - return DTime1 < DTime0; - } -#endif -} - - -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; -#endif - - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif - } -} - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } -} - -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, - size_t dstSize, const void* cSrc, - size_t cSrcSize, void* workSpace, - size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#endif - } -} - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#endif - } -} - -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} - - -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} - -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; - - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} -#endif - -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} - -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#endif - } -} - -} diff --git a/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp b/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp deleted file mode 100644 index ecb71145f..000000000 --- a/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* zstd_ddict.c : - * concentrates all logic that needs to know the internals of ZSTD_DDict object */ - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ -#include "zstd/common/mem.h" /* low level memory routines */ -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/decompress/zstd_decompress_internal.h" -#include "zstd/decompress/zstd_ddict.h" - -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif - -namespace duckdb_zstd { - -/*-******************************************************* -* Types -*********************************************************/ -struct ZSTD_DDict_s { - void* dictBuffer; - const void* dictContent; - size_t dictSize; - ZSTD_entropyDTables_t entropy; - U32 dictID; - U32 entropyPresent; - ZSTD_customMem cMem; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ - -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictContent; -} - -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) -{ - assert(ddict != NULL); - return ddict->dictSize; -} - -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - DEBUGLOG(4, "ZSTD_copyDDictParameters"); - assert(dctx != NULL); - assert(ddict != NULL); - dctx->dictID = ddict->dictID; - dctx->prefixStart = ddict->dictContent; - dctx->virtualStart = ddict->dictContent; - dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; - dctx->previousDstEnd = dctx->dictEnd; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - if (ddict->entropyPresent) { - dctx->litEntropy = 1; - dctx->fseEntropy = 1; - dctx->LLTptr = ddict->entropy.LLTable; - dctx->MLTptr = ddict->entropy.MLTable; - dctx->OFTptr = ddict->entropy.OFTable; - dctx->HUFptr = ddict->entropy.hufTable; - dctx->entropy.rep[0] = ddict->entropy.rep[0]; - dctx->entropy.rep[1] = ddict->entropy.rep[1]; - dctx->entropy.rep[2] = ddict->entropy.rep[2]; - } else { - dctx->litEntropy = 0; - dctx->fseEntropy = 0; - } -} - - -static size_t -ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, - ZSTD_dictContentType_e dictContentType) -{ - ddict->dictID = 0; - ddict->entropyPresent = 0; - if (dictContentType == ZSTD_dct_rawContent) return 0; - - if (ddict->dictSize < 8) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - { U32 const magic = MEM_readLE32(ddict->dictContent); - if (magic != ZSTD_MAGIC_DICTIONARY) { - if (dictContentType == ZSTD_dct_fullDict) - return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ - return 0; /* pure content mode */ - } - } - ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); - - /* load entropy tables */ - RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( - &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted, ""); - ddict->entropyPresent = 1; - return 0; -} - - -static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { - ddict->dictBuffer = NULL; - ddict->dictContent = dict; - if (!dict) dictSize = 0; - } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); - ddict->dictBuffer = internalBuffer; - ddict->dictContent = internalBuffer; - if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); - } - ddict->dictSize = dictSize; - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - - /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); - - return 0; -} - -ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (ddict == NULL) return NULL; - ddict->cMem = customMem; - { size_t const initResult = ZSTD_initDDict_internal(ddict, - dict, dictSize, - dictLoadMethod, dictContentType); - if (ZSTD_isError(initResult)) { - ZSTD_freeDDict(ddict); - return NULL; - } } - return ddict; - } -} - -/*! ZSTD_createDDict() : -* Create a digested dictionary, to start decompression without startup delay. -* `dict` content is copied inside DDict. -* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); -} - -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, to start decompression without startup delay. - * Dictionary content is simply referenced, it will be accessed during decompression. - * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ -ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) -{ - ZSTD_customMem const allocator = { NULL, NULL, NULL }; - return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); -} - - -const ZSTD_DDict* ZSTD_initStaticDDict( - void* sBuffer, size_t sBufferSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - size_t const neededSpace = sizeof(ZSTD_DDict) - + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); - ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; - assert(sBuffer != NULL); - assert(dict != NULL); - if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ - if (sBufferSize < neededSpace) return NULL; - if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ - dict = ddict+1; - } - if (ZSTD_isError( ZSTD_initDDict_internal(ddict, - dict, dictSize, - ZSTD_dlm_byRef, dictContentType) )) - return NULL; - return ddict; -} - - -size_t ZSTD_freeDDict(ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support free on NULL */ - { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); - return 0; - } -} - -/*! ZSTD_estimateDDictSize() : - * Estimate amount of memory that will be needed to create a dictionary for decompression. - * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ -size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) -{ - return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); -} - -size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; /* support sizeof on NULL */ - return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; -} - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) -{ - if (ddict==NULL) return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -} - -} diff --git a/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp b/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp deleted file mode 100644 index 19ebd9078..000000000 --- a/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +++ /dev/null @@ -1,1957 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! - * HEAPMODE : - * Select how default decompression function ZSTD_decompress() allocates its context, - * on stack (0), or into heap (1, default; requires malloc()). - * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. - */ -#ifndef ZSTD_HEAPMODE -# define ZSTD_HEAPMODE 1 -#endif - -/*! -* LEGACY_SUPPORT : -* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) -*/ -#ifndef ZSTD_LEGACY_SUPPORT -# define ZSTD_LEGACY_SUPPORT 0 -#endif - -/*! - * MAXWINDOWSIZE_DEFAULT : - * maximum window size accepted by DStream __by default__. - * Frames requiring more memory will be rejected. - * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). - */ -#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) -#endif - -/*! - * NO_FORWARD_PROGRESS_MAX : - * maximum allowed nb of calls to ZSTD_decompressStream() - * without any forward progress - * (defined as: no byte read from input, and no byte flushed to output) - * before triggering an error. - */ -#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX -# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 -#endif - - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ -#include "zstd/common/mem.h" /* low level memory routines */ -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/common/zstd_internal.h" /* blockProperties_t */ -#include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_DCtx */ -#include "zstd/decompress/zstd_ddict.h" /* ZSTD_DDictDictContent */ -#include "zstd/decompress/zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ - -// #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -// # include "../legacy/zstd_legacy.h" -// #endif -namespace duckdb_zstd { -const U32 ZSTDConstants::LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; - -const U32 ZSTDConstants::OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; - -const U32 ZSTDConstants::OF_bits[MaxOff+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 }; - -const U32 ZSTDConstants::ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, - 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; - -const size_t ZSTDInternalConstants::ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -const U32 ZSTDInternalConstants::LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 6, 7, 8, 9,10,11,12, - 13,14,15,16 }; -const S16 ZSTDInternalConstants::LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, - 2, 3, 2, 1, 1, 1, 1, 1, - -1,-1,-1,-1 }; -#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::LL_defaultNormLog = LL_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 2, 2, 3, 3, - 4, 4, 5, 7, 8, 9,10,11, - 12,13,14,15,16 }; -const S16 ZSTDInternalConstants::ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1,-1,-1, - -1,-1,-1,-1,-1 }; -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -const U32 ZSTDInternalConstants::ML_defaultNormLog = ML_DEFAULTNORMLOG; - -const S16 ZSTDInternalConstants::OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, - 2, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, - -1,-1,-1,-1,-1 }; -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -const U32 ZSTDInternalConstants::OF_defaultNormLog = OF_DEFAULTNORMLOG; -const U32 ZSTDInternalConstants::repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; - -const ZSTD_customMem ZSTDInternalConstants::ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ - -/*-************************************************************* -* Context management -***************************************************************/ -size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support sizeof NULL */ - return sizeof(*dctx) - + ZSTD_sizeof_DDict(dctx->ddictLocal) - + dctx->inBuffSize + dctx->outBuffSize; -} - -size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } - - -static size_t ZSTD_startingInputLength(ZSTD_format_e format) -{ - size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); - /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ - assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); - return startingInputLength; -} - -static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) -{ - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ - dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - dctx->ddict = NULL; - dctx->ddictLocal = NULL; - dctx->dictEnd = NULL; - dctx->ddictIsCold = 0; - dctx->dictUses = ZSTD_dont_use; - dctx->inBuff = NULL; - dctx->inBuffSize = 0; - dctx->outBuffSize = 0; - dctx->streamStage = zdss_init; - dctx->legacyContext = NULL; - dctx->previousLegacyVersion = 0; - dctx->noForwardProgress = 0; - dctx->oversizedDuration = 0; - dctx->bmi2 = 0; - dctx->outBufferMode = ZSTD_obm_buffered; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentEndForFuzzing = NULL; -#endif -} - -ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; - - if ((size_t)workspace & 7) return NULL; /* 8-aligned */ - if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ - - ZSTD_initDCtx_internal(dctx); - dctx->staticSize = workspaceSize; - dctx->inBuff = (char*)(dctx+1); - return dctx; -} - -ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; - - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); - if (!dctx) return NULL; - dctx->customMem = customMem; - ZSTD_initDCtx_internal(dctx); - return dctx; - } -} - -ZSTD_DCtx* ZSTD_createDCtx(void) -{ - DEBUGLOG(3, "ZSTD_createDCtx"); - return ZSTD_createDCtx_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -static void ZSTD_clearDict(ZSTD_DCtx* dctx) -{ - ZSTD_freeDDict(dctx->ddictLocal); - dctx->ddictLocal = NULL; - dctx->ddict = NULL; - dctx->dictUses = ZSTD_dont_use; -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) -{ - if (dctx==NULL) return 0; /* support free on NULL */ - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); - { ZSTD_customMem const cMem = dctx->customMem; - ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); - dctx->inBuff = NULL; -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (dctx->legacyContext) - ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); -#endif - ZSTD_free(dctx, cMem); - return 0; - } -} - -/* no longer useful */ -void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) -{ - size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ -} - - -/*-************************************************************* - * Frame header decoding - ***************************************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -unsigned ZSTD_isFrame(const void* buffer, size_t size) -{ - if (size < ZSTD_FRAMEIDSIZE) return 0; - { U32 const magic = MEM_readLE32(buffer); - if (magic == ZSTD_MAGICNUMBER) return 1; - if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; - } -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(buffer, size)) return 1; -#endif - return 0; -} - -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; -static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -/** ZSTD_frameHeaderSize_internal() : - * srcSize must be large enough to reach header size fields. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. - * @return : size of the Frame Header - * or an error code, which can be tested with ZSTD_isError() */ -static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) -{ - size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); - - { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; - U32 const dictID= fhd & 3; - U32 const singleSegment = (fhd >> 5) & 1; - U32 const fcsId = fhd >> 6; - return minInputSize + !singleSegment - + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] - + (singleSegment && !fcsId); - } -} - -/** ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_frameHeaderSize_prefix. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) -{ - return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); -} - - -/** ZSTD_getFrameHeader_advanced() : - * decode Frame Header, or require larger `srcSize`. - * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) -{ - const BYTE* ip = (const BYTE*)src; - size_t const minInputSize = ZSTD_startingInputLength(format); - - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ - if (srcSize < minInputSize) return minInputSize; - RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); - - if ( (format != ZSTD_f_zstd1_magicless) - && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - /* skippable frame */ - if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) - return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); - zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); - zfhPtr->frameType = ZSTD_skippableFrame; - return 0; - } - RETURN_ERROR(prefix_unknown, ""); - } - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); - if (srcSize < fhsize) return fhsize; - zfhPtr->headerSize = (U32)fhsize; - } - - { BYTE const fhdByte = ip[minInputSize-1]; - size_t pos = minInputSize; - U32 const dictIDSizeCode = fhdByte&3; - U32 const checksumFlag = (fhdByte>>2)&1; - U32 const singleSegment = (fhdByte>>5)&1; - U32 const fcsID = fhdByte>>6; - U64 windowSize = 0; - U32 dictID = 0; - U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; - RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, - "reserved bits, must be zero"); - - if (!singleSegment) { - BYTE const wlByte = ip[pos++]; - U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); - windowSize = (1ULL << windowLog); - windowSize += (windowSize >> 3) * (wlByte&7); - } - switch(dictIDSizeCode) - { - default: assert(0); /* impossible */ - case 0 : break; - case 1 : dictID = ip[pos]; pos++; break; - case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; - case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; - } - switch(fcsID) - { - default: assert(0); /* impossible */ - case 0 : if (singleSegment) frameContentSize = ip[pos]; break; - case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; - case 2 : frameContentSize = MEM_readLE32(ip+pos); break; - case 3 : frameContentSize = MEM_readLE64(ip+pos); break; - } - if (singleSegment) windowSize = frameContentSize; - - zfhPtr->frameType = ZSTD_frame; - zfhPtr->frameContentSize = frameContentSize; - zfhPtr->windowSize = windowSize; - zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - zfhPtr->dictID = dictID; - zfhPtr->checksumFlag = checksumFlag; - } - return 0; -} - -/** ZSTD_getFrameHeader() : - * decode Frame Header, or require larger `srcSize`. - * note : this function does not consume input, it only reads it. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) -{ - return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); -} - - -/** ZSTD_getFrameContentSize() : - * compatible with legacy mode - * @return : decompressed size of the single frame pointed to be `src` if known, otherwise - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) -{ -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); - return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; - } -#endif - { ZSTD_frameHeader zfh; - if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (zfh.frameType == ZSTD_skippableFrame) { - return 0; - } else { - return zfh.frameContentSize; - } } -} - -static size_t readSkippableFrameSize(void const* src, size_t srcSize) -{ - size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; - U32 sizeU32; - - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); - - sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); - RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported, ""); - { - size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); - return skippableSize; - } -} - -/** ZSTD_findDecompressedSize() : - * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames - * @return : decompressed size of the frames contained */ -unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long totalDstSize = 0; - - while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { - U32 const magicNumber = MEM_readLE32(src); - - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } - - { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; - - /* check for overflow */ - if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; - totalDstSize += ret; - } - { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); - if (ZSTD_isError(frameSrcSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; - } - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - - if (srcSize) return ZSTD_CONTENTSIZE_ERROR; - - return totalDstSize; -} - -/** ZSTD_getDecompressedSize() : - * compatible with legacy mode - * @return : decompressed size if known, 0 otherwise - note : 0 can mean any of the following : - - frame content is empty - - decompressed size field is not present in frame header - - frame header unknown / not supported - - frame header not complete (`srcSize` too small) */ -unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) -{ - unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); - return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; -} - - -/** ZSTD_decodeFrameHeader() : - * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). - * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) -{ - size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); - if (ZSTD_isError(result)) return result; /* invalid header */ - RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); -#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - /* Skip the dictID check in fuzzing mode, because it makes the search - * harder. - */ - RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong, ""); -#endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); - return 0; -} - -static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) -{ - ZSTD_frameSizeInfo frameSizeInfo; - frameSizeInfo.compressedSize = ret; - frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; - return frameSizeInfo; -} - -static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) -{ - ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) - return ZSTD_findFrameSizeInfoLegacy(src, srcSize); -#endif - - if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) - && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); - assert(ZSTD_isError(frameSizeInfo.compressedSize) || - frameSizeInfo.compressedSize <= srcSize); - return frameSizeInfo; - } else { - const BYTE* ip = (const BYTE*)src; - const BYTE* const ipstart = ip; - size_t remainingSize = srcSize; - size_t nbBlocks = 0; - ZSTD_frameHeader zfh; - - /* Extract Frame Header */ - { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(ret)) - return ZSTD_errorFrameSizeInfo(ret); - if (ret > 0) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - } - - ip += zfh.headerSize; - remainingSize -= zfh.headerSize; - - /* Iterate over each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return ZSTD_errorFrameSizeInfo(cBlockSize); - - if (ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize + cBlockSize; - nbBlocks++; - - if (blockProperties.lastBlock) break; - } - - /* Final frame content checksum */ - if (zfh.checksumFlag) { - if (remainingSize < 4) - return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); - ip += 4; - } - - frameSizeInfo.compressedSize = ip - ipstart; - frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) - ? zfh.frameContentSize - : nbBlocks * zfh.blockSizeMax; - return frameSizeInfo; - } -} - -/** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -{ - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - return frameSizeInfo.compressedSize; -} - -/** ZSTD_decompressBound() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame or a skippeable frame - * `srcSize` must be at least as large as the frame contained - * @return : the maximum decompressed size of the compressed source - */ -unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) -{ - unsigned long long bound = 0; - /* Iterate over each frame */ - while (srcSize > 0) { - ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); - size_t const compressedSize = frameSizeInfo.compressedSize; - unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; - if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) - return ZSTD_CONTENTSIZE_ERROR; - assert(srcSize >= compressedSize); - src = (const BYTE*)src + compressedSize; - srcSize -= compressedSize; - bound += decompressedBound; - } - return bound; -} - - -/*-************************************************************* - * Frame decoding - ***************************************************************/ - -/** ZSTD_insertBlock() : - * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) -{ - DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); - dctx->previousDstEnd = (const char*)blockStart + blockSize; - return blockSize; -} - - -static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_copyRawBlock"); - if (dst == NULL) { - if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); - memcpy(dst, src, srcSize); - return srcSize; -} - -static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, - BYTE b, - size_t regenSize) -{ - if (dst == NULL) { - if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null, ""); - } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); - memset(dst, b, regenSize); - return regenSize; -} - - -/*! ZSTD_decompressFrame() : - * @dctx must be properly initialized - * will update *srcPtr and *srcSizePtr, - * to make *srcPtr progress by one frame. */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void** srcPtr, size_t *srcSizePtr) -{ - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; - BYTE* op = ostart; - size_t remainingSrcSize = *srcSizePtr; - - DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); - - /* check */ - RETURN_ERROR_IF( - remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); - - /* Frame Header */ - { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( - ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); - if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; - RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTDInternalConstants::ZSTD_blockHeaderSize, - srcSize_wrong, ""); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); - ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; - } - - /* Loop on each block */ - while (1) { - size_t decodedSize; - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - - ip += ZSTDInternalConstants::ZSTD_blockHeaderSize; - remainingSrcSize -= ZSTDInternalConstants::ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); - - switch(blockProperties.blockType) - { - case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); - break; - case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); - break; - case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); - break; - case bt_reserved : - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } - - if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) - XXH64_update(&dctx->xxhState, op, decodedSize); - if (decodedSize != 0) - op += decodedSize; - assert(ip != NULL); - ip += cBlockSize; - remainingSrcSize -= cBlockSize; - if (blockProperties.lastBlock) break; - } - - if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { - RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected, ""); - } - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); - ip += 4; - remainingSrcSize -= 4; - } - - /* Allow caller to get size read */ - *srcPtr = ip; - *srcSizePtr = remainingSrcSize; - return op-ostart; -} - -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize, - const ZSTD_DDict* ddict) -{ - void* const dststart = dst; - int moreThan1Frame = 0; - - DEBUGLOG(5, "ZSTD_decompressMultiFrame"); - assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ - - if (ddict) { - dict = ZSTD_DDict_dictContent(ddict); - dictSize = ZSTD_DDict_dictSize(ddict); - } - - while (srcSize >= ZSTD_startingInputLength(dctx->format)) { - -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) - if (ZSTD_isLegacy(src, srcSize)) { - size_t decodedSize; - size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); - if (ZSTD_isError(frameSize)) return frameSize; - RETURN_ERROR_IF(dctx->staticSize, memory_allocation, - "legacy support is not compatible with static dctx"); - - decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); - if (ZSTD_isError(decodedSize)) return decodedSize; - - assert(decodedSize <=- dstCapacity); - dst = (BYTE*)dst + decodedSize; - dstCapacity -= decodedSize; - - src = (const BYTE*)src + frameSize; - srcSize -= frameSize; - - continue; - } -#endif - - { U32 const magicNumber = MEM_readLE32(src); - DEBUGLOG(4, "reading magic number %08X (expecting %08X)", - (unsigned)magicNumber, ZSTD_MAGICNUMBER); - if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); - assert(skippableSize <= srcSize); - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } } - - if (ddict) { - /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); - } else { - /* this will initialize correctly with no dict if dict == NULL, so - * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); - } - ZSTD_checkContinuity(dctx, dst); - - { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, - &src, &srcSize); - RETURN_ERROR_IF( - (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) - && (moreThan1Frame==1), - srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); - if (ZSTD_isError(res)) return res; - assert(res <= dstCapacity); - if (res != 0) - dst = (BYTE*)dst + res; - dstCapacity -= res; - } - moreThan1Frame = 1; - } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ - - RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); - - return (BYTE*)dst - (BYTE*)dststart; -} - -size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict, size_t dictSize) -{ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); -} - - -static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) -{ - switch (dctx->dictUses) { - default: - assert(0 /* Impossible */); - /* fall-through */ - case ZSTD_dont_use: - ZSTD_clearDict(dctx); - return NULL; - case ZSTD_use_indefinitely: - return dctx->ddict; - case ZSTD_use_once: - dctx->dictUses = ZSTD_dont_use; - return dctx->ddict; - } -} - -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); -} - - -size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ -#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) - size_t regenSize; - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); - regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); - ZSTD_freeDCtx(dctx); - return regenSize; -#else /* stack mode */ - ZSTD_DCtx dctx; - ZSTD_initDCtx_internal(&dctx); - return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); -#endif -} - - -/*-************************************** -* Advanced Streaming Decompression API -* Bufferless and synchronous -****************************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } - -/** - * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, - * we allow taking a partial block as the input. Currently only raw uncompressed blocks can - * be streamed. - * - * For blocks that can be streamed, this allows us to reduce the latency until we produce - * output, and avoid copying the input. - * - * @param inputSize - The total amount of input that the caller currently has. - */ -static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { - if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) - return dctx->expected; - if (dctx->bType != bt_raw) - return dctx->expected; - return MIN(MAX(inputSize, 1), dctx->expected); -} - -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { - switch(dctx->stage) - { - default: /* should not happen */ - assert(0); - case ZSTDds_getFrameHeaderSize: - case ZSTDds_decodeFrameHeader: - return ZSTDnit_frameHeader; - case ZSTDds_decodeBlockHeader: - return ZSTDnit_blockHeader; - case ZSTDds_decompressBlock: - return ZSTDnit_block; - case ZSTDds_decompressLastBlock: - return ZSTDnit_lastBlock; - case ZSTDds_checkChecksum: - return ZSTDnit_checksum; - case ZSTDds_decodeSkippableHeader: - case ZSTDds_skipFrame: - return ZSTDnit_skippableFrame; - } -} - -static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } - -/** ZSTD_decompressContinue() : - * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) - * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) - * or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); - /* Sanity check */ - RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); - - switch (dctx->stage) - { - case ZSTDds_getFrameHeaderSize : - assert(src != NULL); - if (dctx->format == ZSTD_f_zstd1) { /* allows header */ - assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ - if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } } - dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); - if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); - dctx->expected = dctx->headerSize - srcSize; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - - case ZSTDds_decodeFrameHeader: - assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; - - case ZSTDds_decodeBlockHeader: - { blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTDInternalConstants::ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) return cBlockSize; - RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->rleSize = bp.origSize; - if (cBlockSize) { - dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; - return 0; - } - /* empty block */ - if (bp.lastBlock) { - if (dctx->fParams.checksumFlag) { - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* end of frame */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; /* jump to next header */ - dctx->stage = ZSTDds_decodeBlockHeader; - } - return 0; - } - - case ZSTDds_decompressLastBlock: - case ZSTDds_decompressBlock: - DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); - { size_t rSize; - switch(dctx->bType) - { - case bt_compressed: - DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); - rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_raw : - assert(srcSize <= dctx->expected); - rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); - FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); - assert(rSize == srcSize); - dctx->expected -= rSize; - break; - case bt_rle : - rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); - dctx->expected = 0; /* Streaming not supported */ - break; - case bt_reserved : /* should never happen */ - default: - RETURN_ERROR(corruption_detected, "invalid block type"); - } - FORWARD_IF_ERROR(rSize, ""); - RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); - DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); - dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); - dctx->previousDstEnd = (char*)dst + rSize; - - /* Stay on the same stage until we are finished streaming the block. */ - if (dctx->expected > 0) { - return rSize; - } - - if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); - RETURN_ERROR_IF( - dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected, ""); - if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* ends here */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - } - return rSize; - } - - case ZSTDds_checkChecksum: - assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - - case ZSTDds_decodeSkippableHeader: - assert(src != NULL); - assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ - dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ - dctx->stage = ZSTDds_skipFrame; - return 0; - - case ZSTDds_skipFrame: - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } -} - - -static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dict; - dctx->previousDstEnd = (const char*)dict + dictSize; -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - dctx->dictContentBeginForFuzzing = dctx->prefixStart; - dctx->dictContentEndForFuzzing = dctx->previousDstEnd; -#endif - return 0; -} - -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of entropy tables read */ -size_t -ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize) -{ - const BYTE* dictPtr = (const BYTE*)dict; - const BYTE* const dictEnd = dictPtr + dictSize; - - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); - assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ - dictPtr += 8; /* skip header = magic + dictID */ - - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); - ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); - ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); - { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ - size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); -#ifdef HUF_FORCE_DECOMPRESS_X1 - /* in minimal huffman, we always use X1 variants */ - size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#else - size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, - workspace, workspaceSize); -#endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); - dictPtr += hSize; - } - - { short offcodeNCount[MaxOff+1]; - unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->OFTable, - offcodeNCount, offcodeMaxValue, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - offcodeLog); - dictPtr += offcodeHeaderSize; - } - - { short matchlengthNCount[MaxML+1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->MLTable, - matchlengthNCount, matchlengthMaxValue, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - matchlengthLog); - dictPtr += matchlengthHeaderSize; - } - - { short litlengthNCount[MaxLL+1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); - ZSTD_buildFSETable( entropy->LLTable, - litlengthNCount, litlengthMaxValue, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - litlengthLog); - dictPtr += litlengthHeaderSize; - } - - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); - { int i; - size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); - for (i=0; i<3; i++) { - U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; - RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted, ""); - entropy->rep[i] = rep; - } } - - return dictPtr - (const BYTE*)dict; -} - -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); - { U32 const magic = MEM_readLE32(dict); - if (magic != ZSTD_MAGIC_DICTIONARY) { - return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ - } } - dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); - - /* load entropy tables */ - { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); - dict = (const char*)dict + eSize; - dictSize -= eSize; - } - dctx->litEntropy = dctx->fseEntropy = 1; - - /* reference dictionary content */ - return ZSTD_refDictContent(dctx, dict, dictSize); -} - -static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; - -size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) -{ - assert(dctx != NULL); - dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->decodedSize = 0; - dctx->previousDstEnd = NULL; - dctx->prefixStart = NULL; - dctx->virtualStart = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - dctx->bType = bt_reserved; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; -} - -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (dict && dictSize) - RETURN_ERROR_IF( - ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted, ""); - return 0; -} - - -/* ====== ZSTD_DDict ====== */ - -size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); - assert(dctx != NULL); - if (ddict) { - const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); - size_t const dictSize = ZSTD_DDict_dictSize(ddict); - const void* const dictEnd = dictStart + dictSize; - dctx->ddictIsCold = (dctx->dictEnd != dictEnd); - DEBUGLOG(4, "DDict is %s", - dctx->ddictIsCold ? "~cold~" : "hot!"); - } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); - if (ddict) { /* NULL ddict is equivalent to no dictionary */ - ZSTD_copyDDictParameters(dctx, ddict); - } - return 0; -} - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) -{ - if (dictSize < 8) return 0; - if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; - return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); -} - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompress frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary (most common case). - * - The frame was built with dictID intentionally removed. - * Needed dictionary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, frame header could not be decoded. - * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use - * ZSTD_getFrameHeader(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) -{ - ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; - size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); - if (ZSTD_isError(hError)) return 0; - return zfp.dictID; -} - - -/*! ZSTD_decompress_usingDDict() : -* Decompression using a pre-digested Dictionary -* Use dictionary without significant overhead. */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict) -{ - /* pass content and size in case legacy frames are encountered */ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, - NULL, 0, - ddict); -} - - -/*===================================== -* Streaming decompression -*====================================*/ - -ZSTD_DStream* ZSTD_createDStream(void) -{ - DEBUGLOG(3, "ZSTD_createDStream"); - return ZSTD_createDStream_advanced(ZSTDInternalConstants::ZSTD_defaultCMem); -} - -ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) -{ - return ZSTD_initStaticDCtx(workspace, workspaceSize); -} - -ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) -{ - return ZSTD_createDCtx_advanced(customMem); -} - -size_t ZSTD_freeDStream(ZSTD_DStream* zds) -{ - return ZSTD_freeDCtx(zds); -} - - -/* *** Initialization *** */ - -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTDInternalConstants::ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } - -size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (dict && dictSize != 0) { - dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); - dctx->ddict = dctx->ddictLocal; - dctx->dictUses = ZSTD_use_indefinitely; - } - return 0; -} - -size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); -} - -size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) -{ - return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); -} - -size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) -{ - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); - dctx->dictUses = ZSTD_use_once; - return 0; -} - -size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) -{ - return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); -} - - -/* ZSTD_initDStream_usingDict() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) -{ - DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); - return ZSTD_startingInputLength(zds->format); -} - -/* note : this variant can't fail */ -size_t ZSTD_initDStream(ZSTD_DStream* zds) -{ - DEBUGLOG(4, "ZSTD_initDStream"); - return ZSTD_initDStream_usingDDict(zds, NULL); -} - -/* ZSTD_initDStream_usingDDict() : - * ddict will just be referenced, and must outlive decompression session - * this function cannot fail */ -size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) -{ - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); - return ZSTD_startingInputLength(dctx->format); -} - -/* ZSTD_resetDStream() : - * return : expected size, aka ZSTD_startingInputLength(). - * this function cannot fail */ -size_t ZSTD_resetDStream(ZSTD_DStream* dctx) -{ - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); - return ZSTD_startingInputLength(dctx->format); -} - - -size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - if (ddict) { - dctx->ddict = ddict; - dctx->dictUses = ZSTD_use_indefinitely; - } - return 0; -} - -/* ZSTD_DCtx_setMaxWindowSize() : - * note : no direct equivalence in ZSTD_DCtx_setParameter, - * since this version sets windowSize, and the other sets windowLog */ -size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); - size_t const min = (size_t)1 << bounds.lowerBound; - size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); - dctx->maxWindowSize = maxWindowSize; - return 0; -} - -size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) -{ - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); -} - -ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) -{ - ZSTD_bounds bounds = { 0, 0, 0 }; - switch(dParam) { - case ZSTD_d_windowLogMax: - bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; - bounds.upperBound = ZSTD_WINDOWLOG_MAX; - return bounds; - case ZSTD_d_format: - bounds.lowerBound = (int)ZSTD_f_zstd1; - bounds.upperBound = (int)ZSTD_f_zstd1_magicless; - ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); - return bounds; - case ZSTD_d_stableOutBuffer: - bounds.lowerBound = (int)ZSTD_obm_buffered; - bounds.upperBound = (int)ZSTD_obm_stable; - return bounds; - default:; - } - bounds.error = ERROR(parameter_unsupported); - return bounds; -} - -/* ZSTD_dParam_withinBounds: - * @return 1 if value is within dParam bounds, - * 0 otherwise */ -static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - -#define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ -} - -size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) -{ - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - switch(dParam) { - case ZSTD_d_windowLogMax: - if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; - CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); - dctx->maxWindowSize = ((size_t)1) << value; - return 0; - case ZSTD_d_format: - CHECK_DBOUNDS(ZSTD_d_format, value); - dctx->format = (ZSTD_format_e)value; - return 0; - case ZSTD_d_stableOutBuffer: - CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); - dctx->outBufferMode = (ZSTD_outBufferMode_e)value; - return 0; - default:; - } - RETURN_ERROR(parameter_unsupported, ""); -} - -size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) -{ - if ( (reset == ZSTD_reset_session_only) - || (reset == ZSTD_reset_session_and_parameters) ) { - dctx->streamStage = zdss_init; - dctx->noForwardProgress = 0; - } - if ( (reset == ZSTD_reset_parameters) - || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); - ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - } - return 0; -} - - -size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) -{ - return ZSTD_sizeof_DCtx(dctx); -} - -size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) -{ - size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); - unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); - size_t const minRBSize = (size_t) neededSize; - RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge, ""); - return minRBSize; -} - -size_t ZSTD_estimateDStreamSize(size_t windowSize) -{ - size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); - size_t const inBuffSize = blockSize; /* no block can be larger */ - size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); - return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; -} - -size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) -{ - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ - ZSTD_frameHeader zfh; - size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); - if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong, ""); - RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge, ""); - return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); -} - - -/* ***** Decompression ***** */ - -static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; -} - -static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) -{ - if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) - zds->oversizedDuration++; - else - zds->oversizedDuration = 0; -} - -static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) -{ - return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} - -/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ -static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) -{ - ZSTD_outBuffer const expect = zds->expectedOutBuffer; - /* No requirement when ZSTD_obm_stable is not enabled. */ - if (zds->outBufferMode != ZSTD_obm_stable) - return 0; - /* Any buffer is allowed in zdss_init, this must be the same for every other call until - * the context is reset. - */ - if (zds->streamStage == zdss_init) - return 0; - /* The buffer must match our expectation exactly. */ - if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) - return 0; - RETURN_ERROR(dstBuffer_wrong, "ZSTD_obm_stable enabled but output differs!"); -} - -/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() - * and updates the stage and the output buffer state. This call is extracted so it can be - * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. - * NOTE: You must break after calling this function since the streamStage is modified. - */ -static size_t ZSTD_decompressContinueStream( - ZSTD_DStream* zds, char** op, char* oend, - void const* src, size_t srcSize) { - int const isSkipFrame = ZSTD_isSkipFrame(zds); - if (zds->outBufferMode == ZSTD_obm_buffered) { - size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - if (!decodedSize && !isSkipFrame) { - zds->streamStage = zdss_read; - } else { - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; - } - } else { - /* Write directly into the output buffer */ - size_t const dstSize = isSkipFrame ? 0 : oend - *op; - size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); - FORWARD_IF_ERROR(decodedSize, ""); - *op += decodedSize; - /* Flushing is not needed. */ - zds->streamStage = zdss_read; - assert(*op <= oend); - assert(zds->outBufferMode == ZSTD_obm_stable); - } - return 0; -} - -size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) -{ - const char* const src = (const char*)input->src; - const char* const istart = input->pos != 0 ? src + input->pos : src; - const char* const iend = input->size != 0 ? src + input->size : src; - const char* ip = istart; - char* const dst = (char*)output->dst; - char* const ostart = output->pos != 0 ? dst + output->pos : dst; - char* const oend = output->size != 0 ? dst + output->size : dst; - char* op = ostart; - U32 someMoreWork = 1; - - DEBUGLOG(5, "ZSTD_decompressStream"); - RETURN_ERROR_IF( - input->pos > input->size, - srcSize_wrong, - "forbidden. in: pos: %u vs size: %u", - (U32)input->pos, (U32)input->size); - RETURN_ERROR_IF( - output->pos > output->size, - dstSize_tooSmall, - "forbidden. out: pos: %u vs size: %u", - (U32)output->pos, (U32)output->size); - DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); - FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); - - while (someMoreWork) { - switch(zds->streamStage) - { - case zdss_init : - DEBUGLOG(5, "stage zdss_init => transparent reset "); - zds->streamStage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - zds->expectedOutBuffer = *output; - /* fall-through */ - - case zdss_loadHeader : - DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - if (zds->legacyVersion) { - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; - return hint; - } } -#endif - { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); - DEBUGLOG(5, "header size : %u", (U32)hSize); - if (ZSTD_isError(hSize)) { -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) - U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); - if (legacyVersion) { - ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); - const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; - size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; - DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); - RETURN_ERROR_IF(zds->staticSize, memory_allocation, - "legacy support is incompatible with static dctx"); - FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, - zds->previousLegacyVersion, legacyVersion, - dict, dictSize), ""); - zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; - { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); - if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ - return hint; - } } -#endif - return hSize; /* error */ - } - if (hSize != 0) { /* need more input */ - size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - size_t const remainingInput = (size_t)(iend-ip); - assert(iend >= ip); - if (toLoad > remainingInput) { /* not enough input to load full header */ - if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); - zds->lhSize += remainingInput; - } - input->pos = input->size; - return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTDInternalConstants::ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; - break; - } } - - /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && zds->fParams.frameType != ZSTD_skippableFrame - && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); - if (cSize <= (size_t)(iend-istart)) { - /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); - if (ZSTD_isError(decompressedSize)) return decompressedSize; - DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") - ip = istart + cSize; - op += decompressedSize; - zds->expected = 0; - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } } - - /* Check output buffer is large enough for ZSTD_odm_stable. */ - if (zds->outBufferMode == ZSTD_obm_stable - && zds->fParams.frameType != ZSTD_skippableFrame - && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN - && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { - RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); - } - - /* Consume header (see ZSTDds_decodeFrameHeader) */ - DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); - - if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); - zds->stage = ZSTDds_skipFrame; - } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); - zds->expected = ZSTDInternalConstants::ZSTD_blockHeaderSize; - zds->stage = ZSTDds_decodeBlockHeader; - } - - /* control buffer memory usage */ - DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", - (U32)(zds->fParams.windowSize >>10), - (U32)(zds->maxWindowSize >> 10) ); - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge, ""); - - /* Adapt buffer sizes to frame header instructions */ - { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_obm_buffered - ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) - : 0; - - ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); - - { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); - int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); - - if (tooSmall || tooLarge) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation, ""); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } } - zds->streamStage = zdss_read; - /* fall-through */ - - case zdss_read: - DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); - DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); - if (neededInSize==0) { /* end of frame */ - zds->streamStage = zdss_init; - someMoreWork = 0; - break; - } - if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); - ip += neededInSize; - /* Function modifies the stage so we must break */ - break; - } } - if (ip==iend) { someMoreWork = 0; break; } /* no more input */ - zds->streamStage = zdss_load; - /* fall-through */ - - case zdss_load: - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); - size_t const toLoad = neededInSize - zds->inPos; - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t loadedSize; - /* At this point we shouldn't be decompressing a block that we can stream. */ - assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); - if (isSkipFrame) { - loadedSize = MIN(toLoad, (size_t)(iend-ip)); - } else { - RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, - corruption_detected, - "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); - } - ip += loadedSize; - zds->inPos += loadedSize; - if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ - - /* decode loaded input */ - zds->inPos = 0; /* input is consumed */ - FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); - /* Function modifies the stage so we must break */ - break; - } - case zdss_flush: - { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); - op += flushedSize; - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->streamStage = zdss_read; - if ( (zds->outBuffSize < zds->fParams.frameContentSize) - && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { - DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", - (int)(zds->outBuffSize - zds->outStart), - (U32)zds->fParams.blockSizeMax); - zds->outStart = zds->outEnd = 0; - } - break; - } } - /* cannot complete flush */ - someMoreWork = 0; - break; - - default: - assert(0); /* impossible */ - RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ - } } - - /* result */ - input->pos = (size_t)(ip - (const char*)(input->src)); - output->pos = (size_t)(op - (char*)(output->dst)); - - /* Update the expected output buffer for ZSTD_obm_stable. */ - zds->expectedOutBuffer = *output; - - if ((ip==istart) && (op==ostart)) { /* no forward progress */ - zds->noForwardProgress ++; - if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); - RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); - assert(0); - } - } else { - zds->noForwardProgress = 0; - } - { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); - if (!nextSrcSizeHint) { /* frame fully decoded */ - if (zds->outEnd == zds->outStart) { /* output fully flushed */ - if (zds->hostageByte) { - if (input->pos >= input->size) { - /* can't release hostage (not present) */ - zds->streamStage = zdss_read; - return 1; - } - input->pos++; /* release hostage */ - } /* zds->hostageByte */ - return 0; - } /* zds->outEnd == zds->outStart */ - if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ - input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ - zds->hostageByte=1; - } - return 1; - } /* nextSrcSizeHint==0 */ - nextSrcSizeHint += ZSTDInternalConstants::ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ - assert(zds->inPos <= nextSrcSizeHint); - nextSrcSizeHint -= zds->inPos; /* part already loaded*/ - return nextSrcSizeHint; - } -} - -size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos) -{ - ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; - ZSTD_inBuffer input = { src, srcSize, *srcPos }; - /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ - size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); - *dstPos = output.pos; - *srcPos = input.pos; - return cErr; -} - -} diff --git a/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp b/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp deleted file mode 100644 index f7574918e..000000000 --- a/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +++ /dev/null @@ -1,1418 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* zstd_decompress_block : - * this module takes care of decompressing _compressed_ block */ - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ -#include "zstd/common/compiler.h" /* prefetch */ -#include "zstd/common/mem.h" /* low level memory routines */ -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/common/zstd_internal.h" -#include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_DCtx */ -#include "zstd/decompress/zstd_ddict.h" /* ZSTD_DDictDictContent */ -#include "zstd/decompress/zstd_decompress_block.h" -namespace duckdb_zstd { -/*_******************************************************* -* Macros -**********************************************************/ - -/* These two optional macros force the use one way or another of the two - * ZSTD_decompressSequences implementations. You can't force in both directions - * at the same time. - */ -#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" -#endif - - -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } - - -/*-************************************************************* - * Block decoding - ***************************************************************/ - -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr) -{ - RETURN_ERROR_IF(srcSize < ZSTDInternalConstants::ZSTD_blockHeaderSize, srcSize_wrong, ""); - - { U32 const cBlockHeader = MEM_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); - return cSize; - } -} - - -/* Hidden declaration for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize); -/*! ZSTD_decodeLiteralsBlock() : - * @return : nb of bytes read from src (< srcSize ) - * note : symbol not declared but exposed for fullbench */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, - const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); - - { const BYTE* const istart = (const BYTE*) src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - - switch(litEncType) - { - case set_repeat: - DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); - /* fall-through */ - - case set_compressed: - RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); - { size_t lhSize, litSize, litCSize; - U32 singleStream=0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = MEM_readLE32(istart); - size_t hufSuccess; - switch(lhlCode) - { - case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); - - /* prefetch huffman table if cold */ - if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { - PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); - } - - if (litEncType==set_repeat) { - if (singleStream) { - hufSuccess = HUF_decompress1X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } else { - hufSuccess = HUF_decompress4X_usingDTable_bmi2( - dctx->litBuffer, litSize, istart+lhSize, litCSize, - dctx->HUFptr, dctx->bmi2); - } - } else { - if (singleStream) { -#if defined(HUF_FORCE_DECOMPRESS_X2) - hufSuccess = HUF_decompress1X_DCtx_wksp( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace)); -#else - hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); -#endif - } else { - hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( - dctx->entropy.hufTable, dctx->litBuffer, litSize, - istart+lhSize, litCSize, dctx->workspace, - sizeof(dctx->workspace), dctx->bmi2); - } - } - - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); - - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } - - case set_basic: - { size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - break; - } - - if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); - memcpy(dctx->litBuffer, istart+lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize+litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart+lhSize; - dctx->litSize = litSize; - return lhSize+litSize; - } - - case set_rle: - { U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch(lhlCode) - { - case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = MEM_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = MEM_readLE24(istart) >> 4; - RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); - break; - } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize+1; - } - default: - RETURN_ERROR(corruption_detected, "impossible"); - } - } -} - -/* Default FSE distribution tables. - * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions - * They were generated programmatically with following method : - * - start from default distributions, present in /lib/common/zstd_internal.h - * - generate tables normally, using ZSTD_buildFSETable() - * - printout the content of tables - * - pretify output, report below, test with fuzzer to ensure it's correct */ - -/* Default FSE distribution table for Literal Lengths */ -static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; - DTableH->fastMode = 0; - - cell->nbBits = 0; - cell->nextState = 0; - assert(nbAddBits < 255); - cell->nbAdditionalBits = (BYTE)nbAddBits; - cell->baseValue = baseValue; -} - - -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * cannot fail if input is valid => - * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) -{ - ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; - - /* Sanity Checks */ - assert(maxSymbolValue <= MaxSeq); - assert(tableLog <= MaxFSELog); - - /* Init, lay down lowprob symbols */ - { ZSTD_seqSymbol_header DTableH; - DTableH.tableLog = tableLog; - DTableH.fastMode = 1; - { S16 const largeLimit= (S16)(1 << (tableLog-1)); - U32 s; - for (s=0; s= largeLimit) DTableH.fastMode=0; - assert(normalizedCounter[s]>=0); - symbolNext[s] = (U16)normalizedCounter[s]; - } } } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { U32 const tableMask = tableSize-1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ - } } - assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { U32 u; - for (u=0; u max, corruption_detected, ""); - { U32 const symbol = *(const BYTE*)src; - U32 const baseline = baseValue[symbol]; - U32 const nbBits = nbAdditionalBits[symbol]; - ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); - } - *DTablePtr = DTableSpace; - return 1; - case set_basic : - *DTablePtr = defaultTable; - return 0; - case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); - /* prefetch FSE table if used */ - if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { - const void* const pStart = *DTablePtr; - size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); - PREFETCH_AREA(pStart, pSize); - } - return 0; - case set_compressed : - { unsigned tableLog; - S16 norm[MaxSeq+1]; - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); - *DTablePtr = DTableSpace; - return headerSize; - } - default : - assert(0); - RETURN_ERROR(GENERIC, "impossible"); - } -} - -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize) -{ - const BYTE* const istart = (const BYTE* const)src; - const BYTE* const iend = istart + srcSize; - const BYTE* ip = istart; - int nbSeq; - DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); - - /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); - - /* SeqHead */ - nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); - return 1; - } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; - } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); - nbSeq = ((nbSeq-0x80)<<8) + *ip++; - } - } - *nbSeqPtr = nbSeq; - - /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ - { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ - { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, - LLtype, MaxLL, LLFSELog, - ip, iend-ip, - ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits, - LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += llhSize; - } - - { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, - OFtype, MaxOff, OffFSELog, - ip, iend-ip, - ZSTDConstants::OF_base, ZSTDConstants::OF_bits, - OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += ofhSize; - } - - { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, - MLtype, MaxML, MLFSELog, - ip, iend-ip, - ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits, - ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); - ip += mlhSize; - } - } - - return ip-istart; -} - - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE* match; -} seq_t; - -typedef struct { - size_t state; - const ZSTD_seqSymbol* table; -} ZSTD_fseState; - -typedef struct { - BIT_DStream_t DStream; - ZSTD_fseState stateLL; - ZSTD_fseState stateOffb; - ZSTD_fseState stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; -} seqState_t; - -/*! ZSTD_overlapCopy8() : - * Copies 8 bytes from ip to op and updates op and ip where ip <= op. - * If the offset is < 8 then the offset is spread to at least 8 bytes. - * - * Precondition: *ip <= *op - * Postcondition: *op - *op >= 8 - */ -HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { - assert(*ip <= *op); - if (offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ - static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ - int const sub2 = dec64table[offset]; - (*op)[0] = (*ip)[0]; - (*op)[1] = (*ip)[1]; - (*op)[2] = (*ip)[2]; - (*op)[3] = (*ip)[3]; - *ip += dec32table[offset]; - ZSTD_copy4(*op+4, *ip); - *ip -= sub2; - } else { - ZSTD_copy8(*op, *ip); - } - *ip += 8; - *op += 8; - assert(*op - *ip >= 8); -} - -/*! ZSTD_safecopy() : - * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer - * and write up to 16 bytes past oend_w (op >= oend_w is allowed). - * This function is only called in the uncommon case where the sequence is near the end of the block. It - * should be fast for a single long sequence, but can be slow for several short sequences. - * - * @param ovtype controls the overlap detection - * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. - * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. - * The src buffer must be before the dst buffer. - */ -static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { - ptrdiff_t const diff = op - ip; - BYTE* const oend = op + length; - - assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || - (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); - - if (length < 8) { - /* Handle short lengths. */ - while (op < oend) *op++ = *ip++; - return; - } - if (ovtype == ZSTD_overlap_src_before_dst) { - /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ - assert(length >= 8); - ZSTD_overlapCopy8(&op, &ip, diff); - assert(op - ip >= 8); - assert(op <= oend); - } - - if (oend <= oend_w) { - /* No risk of overwrite. */ - ZSTD_wildcopy(op, ip, length, ovtype); - return; - } - if (op <= oend_w) { - /* Wildcopy until we get close to the end. */ - assert(oend > oend_w); - ZSTD_wildcopy(op, ip, oend_w - op, ovtype); - ip += oend_w - op; - op = oend_w; - } - /* Handle the leftovers. */ - while (op < oend) *op++ = *ip++; -} - -/* ZSTD_execSequenceEnd(): - * This version handles cases that are near the end of the output buffer. It requires - * more careful checks to make sure there is no overflow. By separating out these hard - * and unlikely cases, we can speed up the common cases. - * - * NOTE: This function needs to be fast for a single long sequence, but doesn't need - * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). - */ -FORCE_NOINLINE -size_t ZSTD_execSequenceEnd(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - - /* bounds checks : careful of address space overflow in 32-bit mode */ - RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); - assert(op < op + sequenceLength); - assert(oLitEnd < op + sequenceLength); - - /* copy literals */ - ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); - op = oLitEnd; - *litPtr = iLitEnd; - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); - match = dictEnd - (prefixStart-match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); - return sequenceLength; -} - -HINT_INLINE -size_t ZSTD_execSequence(BYTE* op, - BYTE* const oend, seq_t sequence, - const BYTE** litPtr, const BYTE* const litLimit, - const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) -{ - BYTE* const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ - const BYTE* const iLitEnd = *litPtr + sequence.litLength; - const BYTE* match = oLitEnd - sequence.offset; - - assert(op != NULL /* Precondition */); - assert(oend_w < oend /* No underflow */); - /* Handle edge cases in a slow path: - * - Read beyond end of literals - * - Match end is within WILDCOPY_OVERLIMIT of oend - * - 32-bit mode and the match length overflows - */ - if (UNLIKELY( - iLitEnd > litLimit || - oMatchEnd > oend_w || - (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) - return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); - - /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ - assert(op <= oLitEnd /* No overflow */); - assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); - assert(oMatchEnd <= oend /* No underflow */); - assert(iLitEnd <= litLimit /* Literal length is in bounds */); - assert(oLitEnd <= oend_w /* Can wildcopy literals */); - assert(oMatchEnd <= oend_w /* Can wildcopy matches */); - - /* Copy Literals: - * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. - * We likely don't need the full 32-byte wildcopy. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(op, (*litPtr)); - if (UNLIKELY(sequence.litLength > 16)) { - ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); - } - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* Copy Match */ - if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { - /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); - match = dictEnd + (match - prefixStart); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currentPrefixSegment */ - { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = prefixStart; - } } - /* Match within prefix of 1 or more bytes */ - assert(op <= oMatchEnd); - assert(oMatchEnd <= oend_w); - assert(match >= prefixStart); - assert(sequence.matchLength >= 1); - - /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy - * without overlap checking. - */ - if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { - /* We bet on a full wildcopy for matches, since we expect matches to be - * longer than literals (in general). In silesia, ~10% of matches are longer - * than 16 bytes. - */ - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); - return sequenceLength; - } - assert(sequence.offset < WILDCOPY_VECLEN); - - /* Copy 8 bytes and spread the offset to be >= 8. */ - ZSTD_overlapCopy8(&op, &match, sequence.offset); - - /* If the match length is > 8 bytes, then continue with the wildcopy. */ - if (sequence.matchLength > 8) { - assert(op < oMatchEnd); - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); - } - return sequenceLength; -} - -static void -ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) -{ - const void* ptr = dt; - const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", - (U32)DStatePtr->state, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) -{ - ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -FORCE_INLINE_TEMPLATE void -ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) -{ - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.nextState + lowBits; -} - -/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum - * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) - * bits before reloading. This value is the maximum number of bytes we read - * after reloading when we are decoding long offsets. - */ -#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ - (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ - ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ - : 0) - -typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; - -FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) -{ - seq_t seq; - ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; - ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; - ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; - U32 const llBase = llDInfo.baseValue; - U32 const mlBase = mlDInfo.baseValue; - U32 const ofBase = ofDInfo.baseValue; - BYTE const llBits = llDInfo.nbAdditionalBits; - BYTE const mlBits = mlDInfo.nbAdditionalBits; - BYTE const ofBits = ofDInfo.nbAdditionalBits; - BYTE const totalBits = llBits+mlBits+ofBits; - - /* sequence */ - { size_t offset; - if (ofBits > 1) { - ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); - ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); - assert(ofBits <= MaxOff); - if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { - U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); - offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - BIT_reloadDStream(&seqState->DStream); - if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); - assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ - } else { - offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); - } - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } else { - U32 const ll0 = (llBase == 0); - if (LIKELY((ofBits == 0))) { - if (LIKELY(!ll0)) - offset = seqState->prevOffset[0]; - else { - offset = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - } else { - offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); - { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } } } - seq.offset = offset; - } - - seq.matchLength = mlBase; - if (mlBits > 0) - seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); - - if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) - BIT_reloadDStream(&seqState->DStream); - if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ - ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); - - seq.litLength = llBase; - if (llBits > 0) - seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); - - if (MEM_32bits()) - BIT_reloadDStream(&seqState->DStream); - - DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update - * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). - * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). - * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the - * better option, so it is the default for other compilers. But, if you - * measure that it is worse, please put up a pull request. - */ - { -#if defined(__GNUC__) && !defined(__clang__) - const int kUseUpdateFseState = 1; -#else - const int kUseUpdateFseState = 0; -#endif - if (kUseUpdateFseState) { - ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - } else { - ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ - if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ - } - } - - return seq; -} - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) -{ - size_t const windowSize = dctx->fParams.windowSize; - /* No dictionary used. */ - if (dctx->dictContentEndForFuzzing == NULL) return 0; - /* Dictionary is our prefix. */ - if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; - /* Dictionary is not our ext-dict. */ - if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; - /* Dictionary is not within our window size. */ - if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; - /* Dictionary is active. */ - return 1; -} - -MEM_STATIC void ZSTD_assertValidSequence( - ZSTD_DCtx const* dctx, - BYTE const* op, BYTE const* oend, - seq_t const seq, - BYTE const* prefixStart, BYTE const* virtualStart) -{ - size_t const windowSize = dctx->fParams.windowSize; - size_t const sequenceSize = seq.litLength + seq.matchLength; - BYTE const* const oLitEnd = op + seq.litLength; - DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", - (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - assert(op <= oend); - assert((size_t)(oend - op) >= sequenceSize); - assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); - if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { - size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); - /* Offset must be within the dictionary. */ - assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); - assert(seq.offset <= windowSize + dictSize); - } else { - /* Offset must be within our window. */ - assert(seq.offset <= windowSize); - } -} -#endif - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -FORCE_INLINE_TEMPLATE size_t -DONT_VECTORIZE -ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - DEBUGLOG(5, "ZSTD_decompressSequences_body"); - (void)frame; - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - size_t error = 0; - dctx->fseEntropy = 1; - { U32 i; for (i=0; ientropy.rep[i]; } - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - assert(dst != NULL); - - ZSTD_STATIC_ASSERT( - BIT_DStream_unfinished < BIT_DStream_completed && - BIT_DStream_endOfBuffer < BIT_DStream_completed && - BIT_DStream_completed < BIT_DStream_overflow); - -#if defined(__GNUC__) && defined(__x86_64__) - /* Align the decompression loop to 32 + 16 bytes. - * - * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression - * speed swings based on the alignment of the decompression loop. This - * performance swing is caused by parts of the decompression loop falling - * out of the DSB. The entire decompression loop should fit in the DSB, - * when it can't we get much worse performance. You can measure if you've - * hit the good case or the bad case with this perf command for some - * compressed file test.zst: - * - * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ - * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst - * - * If you see most cycles served out of the MITE you've hit the bad case. - * If you see most cycles served out of the DSB you've hit the good case. - * If it is pretty even then you may be in an okay case. - * - * I've been able to reproduce this issue on the following CPUs: - * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 - * Use Instruments->Counters to get DSB/MITE cycles. - * I never got performance swings, but I was able to - * go from the good case of mostly DSB to half of the - * cycles served from MITE. - * - Coffeelake: Intel i9-9900k - * - * I haven't been able to reproduce the instability or DSB misses on any - * of the following CPUS: - * - Haswell - * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH - * - Skylake - * - * If you are seeing performance stability this script can help test. - * It tests on 4 commits in zstd where I saw performance change. - * - * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 - */ - __asm__(".p2align 5"); - __asm__("nop"); - __asm__(".p2align 4"); -#endif - for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); -#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) - assert(!ZSTD_isError(oneSeqSize)); - if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); -#endif - DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); - BIT_reloadDStream(&(seqState.DStream)); - /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) break; - } - - /* check if reached exact end */ - DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; - RETURN_ERROR_IF(nbSeq, corruption_detected, ""); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); - /* save reps for next block */ - { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - } - - return op-ostart; -} - -static size_t -ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -FORCE_INLINE_TEMPLATE size_t -ZSTD_decompressSequencesLong_body( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - const BYTE* ip = (const BYTE*)seqStart; - const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; - BYTE* const oend = ostart + maxDstSize; - BYTE* op = ostart; - const BYTE* litPtr = dctx->litPtr; - const BYTE* const litEnd = litPtr + dctx->litSize; - const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); - const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); - const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); - (void)frame; - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 - seq_t sequences[STORED_SEQS]; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - dctx->fseEntropy = 1; - { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; - assert(dst != NULL); - assert(iend >= ip); - RETURN_ERROR_IF( - ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected, ""); - ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } - } - - /* last literal segment */ - { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); - if (op != NULL) { - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - } - - return op-ostart; -} - -static size_t -ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - - -#if DYNAMIC_BMI2 - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - -#endif /* DYNAMIC_BMI2 */ - -typedef size_t (*ZSTD_decompressSequences_t)( - ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame); - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG -static size_t -ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequences"); - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ - - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT -/* ZSTD_decompressSequencesLong() : - * decompression function triggered when a minimum share of offsets is considered "long", - * aka out of cache. - * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". - * This function will try to mitigate main memory latency through the use of prefetching */ -static size_t -ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, - void* dst, size_t maxDstSize, - const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset, - const int frame) -{ - DEBUGLOG(5, "ZSTD_decompressSequencesLong"); -#if DYNAMIC_BMI2 - if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); - } -#endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); -} -#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ - - - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) -/* ZSTD_getLongOffsetsShare() : - * condition : offTable must be valid - * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) - * compared to maximum possible of (1< 22) total += 1; - } - - assert(tableLog <= OffFSELog); - total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ - - return total; -} -#endif - -size_t -ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame) -{ /* blockType == blockCompressed */ - const BYTE* ip = (const BYTE*)src; - /* isLongOffset must be true if there are long offsets. - * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. - * We don't expect that to be the case in 64-bit mode. - * In block mode, window size is not known, so we have to be conservative. - * (note: but it could be evaluated from current-lowLimit) - */ - ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); - DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); - - /* Decode literals section */ - { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); - if (ZSTD_isError(litCSize)) return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - - /* Build Decoding Tables */ - { - /* These macros control at build-time which decompressor implementation - * we use. If neither is defined, we do some inspection and dispatch at - * runtime. - */ -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - int usePrefetchDecoder = dctx->ddictIsCold; -#endif - int nbSeq; - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); - if (ZSTD_isError(seqHSize)) return seqHSize; - ip += seqHSize; - srcSize -= seqHSize; - - RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if ( !usePrefetchDecoder - && (!frame || (dctx->fParams.windowSize > (1<<24))) - && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ - U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); - U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ - usePrefetchDecoder = (shareLongOffsets >= minShare); - } -#endif - - dctx->ddictIsCold = 0; - -#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ - !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) - if (usePrefetchDecoder) -#endif -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif - -#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG - /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); -#endif - } -} - - -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); - dctx->prefixStart = dst; - dctx->previousDstEnd = dst; - } -} - - -size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); - dctx->previousDstEnd = (char*)dst + dSize; - return dSize; -} - -} \ No newline at end of file diff --git a/src/duckdb/third_party/zstd/include/zstd.h b/src/duckdb/third_party/zstd/include/zstd.h deleted file mode 100644 index ade94c2d5..000000000 --- a/src/duckdb/third_party/zstd/include/zstd.h +++ /dev/null @@ -1,1015 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 - -/* ====== Dependency ======*/ -#include /* INT_MAX */ -#include /* size_t */ - - -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDLIB_API ZSTDLIB_VISIBILITY -#endif - -namespace duckdb_zstd { - -/******************************************************************************* - Introduction - - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting - real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression - functions. - - The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - which is currently 22. Levels >= 20, labeled `--ultra`, should be used with - caution, as they require more memory. The library also offers negative - compression levels, which extend the range of speed vs. ratio preferences. - The lower the level, the faster the speed (at the cost of compression). - - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit context) - - unbounded multiple steps (described as Streaming compression) - - The compression ratio achievable on small data can be highly improved using - a dictionary. Dictionary compression can be performed in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing - dictionary API) - - Advanced experimental functions can be accessed using - `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. - - Advanced experimental APIs should never be used with a dynamically-linked - library. They are not "stable"; their definitions or signatures may change in - the future. Only static linking is allowed. -*******************************************************************************/ - -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 5 - -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ - -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ - -/* ************************************* - * Default constant - ***************************************/ -#ifndef ZSTD_CLEVEL_DEFAULT -# define ZSTD_CLEVEL_DEFAULT 3 -#endif - -/* ************************************* - * Constants - ***************************************/ - -/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ -#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 - -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - -/*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); - -/*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); - -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ -ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); - -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD frame or skippable frame. - * `srcSize` must be >= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); - - -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ - - -/*************************************** -* Explicit context -***************************************/ -/*= Compression context - * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); - -/*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . - * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. - */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); - -/*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); - -/*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. - */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - - -/*************************************** -* Advanced compression API -***************************************/ - -/* API design : - * Parameters are pushed one by one into an existing context, - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . - * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). - * - * This API supercedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. - */ - - -/* Compression strategies, listed from fastest to strongest */ -typedef enum { ZSTD_fast=1, - ZSTD_dfast=2, - ZSTD_greedy=3, - ZSTD_lazy=4, - ZSTD_lazy2=5, - ZSTD_btlazy2=6, - ZSTD_btopt=7, - ZSTD_btultra=8, - ZSTD_btultra2=9 - /* note : new strategies _might_ be added in the future. - Only the order (from fast to strong) is guaranteed */ -} ZSTD_strategy; - - -typedef enum { - - /* compression parameters - * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. - * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. - * Note that exact compression parameters are dynamically determined, - * depending on both compression level and srcSize (when known). - * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. - * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level does not automatically set all other compression parameters - * to default. Setting this will however eventually dynamically impact the compression - * parameters which have not been manually set. The manually set - * ones will 'stick'. */ - /* Advanced compression parameters : - * It's possible to pin down compression parameters to some specific values. - * In which case, these values are no longer dynamically selected by the compressor */ - ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. - * This will set a memory budget for streaming decompression, - * with larger values requiring more memory - * and typically compressing more. - * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "use default windowLog". - * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ - ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. - * Resulting memory usage is (1 << (hashLog+2)). - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. - * Larger tables improve compression ratio of strategies <= dFast, - * and improve speed of strategies > dFast. - * Special: value 0 means "use default hashLog". */ - ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. - * Resulting memory usage is (1 << (chainLog+2)). - * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. - * Larger tables result in better and slower compression. - * This parameter is useless for "fast" strategy. - * It's still useful when using "dfast" strategy, - * in which case it defines a secondary probe table. - * Special: value 0 means "use default chainLog". */ - ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. - * More attempts result in better and slower compression. - * This parameter is useless for "fast" and "dFast" strategies. - * Special: value 0 means "use default searchLog". */ - ZSTD_c_minMatch=105, /* Minimum size of searched matches. - * Note that Zstandard can still find matches of smaller size, - * it just tweaks its search algorithm to look for this size and larger. - * Larger values increase compression and decompression speed, but decrease ratio. - * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. - * Note that currently, for all strategies < btopt, effective minimum is 4. - * , for all strategies > fast, effective maximum is 6. - * Special: value 0 means "use default minMatchLength". */ - ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. - * For strategies btopt, btultra & btultra2: - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger, and slower. - * For strategy fast: - * Distance between match sampling. - * Larger values make compression faster, and weaker. - * Special: value 0 means "use default targetLength". */ - ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ - - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio - * for large inputs, by finding large matches at long distance. - * It increases memory usage and window size. - * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. */ - ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, - * but decrease compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * default: windowlog - 7. - * Special: value 0 means "automatically determine hashlog". */ - ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. - * Special: value 0 means "use default value" (default: 64). */ - ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values improve collision resolution but decrease compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. - * Special: value 0 means "use default value" (default: 3). */ - ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). - * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. - * Larger values improve compression speed. - * Deviating far from default value will likely result in a compression ratio decrease. - * Special: value 0 means "automatically determine hashRateLog". */ - - /* frame parameters */ - ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression. - * This is automatically the case when using ZSTD_compress2(), - * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ - ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ - ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ - - /* multi-threading parameters */ - /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). - * They return an error otherwise. */ - ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : - * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression work is performed in parallel, within worker threads. - * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : - * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). - * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ - ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. - * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. - * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. - * The minimum size is automatically and transparently enforced. */ - ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. - * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. - * It helps preserve compression ratio, while each job is compressed in parallel. - * This value is enforced only when nbWorkers >= 1. - * Larger values increase compression ratio, but decrease speed. - * Possible values range from 0 to 9 : - * - 0 means "default" : value will be determined by the library, depending on strategy - * - 1 means "no overlap" - * - 9 means "full overlap", using a full window size. - * Each intermediate rank increases/decreases load size by a factor 2 : - * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default - * default value varies between 6 and 9, depending on strategy */ - - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_rsyncable - * ZSTD_c_format - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. - */ - ZSTD_c_experimentalParam1=500, - ZSTD_c_experimentalParam2=10, - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 -} ZSTD_cParameter; - -typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; - -/*! ZSTD_cParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - lower and upper bounds, both inclusive - */ -ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); - -/*! ZSTD_CCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_cParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is generally only possible during frame initialization (before starting compression). - * Exception : when using multi-threading mode (nbWorkers >= 1), - * the following parameters can be updated _during_ compression (within same frame): - * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - * new parameters will be active for next job only (after a flush()). - * @return : an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); - -/*! ZSTD_CCtx_setPledgedSrcSize() : - * Total input data size to be compressed as a single frame. - * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. - * This value will also be controlled at end of frame, and trigger an error if not respected. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. - * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. - * Note 2 : pledgedSrcSize is only valid once, for the next frame. - * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. - * Note 3 : Whenever all input data is provided and consumed in a single round, - * for example with ZSTD_compress2(), - * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), - * this value is automatically overridden by srcSize instead. - */ -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); - -typedef enum { - ZSTD_reset_session_only = 1, - ZSTD_reset_parameters = 2, - ZSTD_reset_session_and_parameters = 3 -} ZSTD_ResetDirective; - -/*! ZSTD_CCtx_reset() : - * There are 2 different things that can be reset, independently or jointly : - * - The session : will stop compressing current frame, and make CCtx ready to start a new one. - * Useful after an error, or to interrupt any ongoing compression. - * Any internal data not yet flushed is cancelled. - * Compression parameters and dictionary remain unchanged. - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. - */ -ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); - -/*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - - -/*************************************** -* Advanced decompression API -***************************************/ - -/* The advanced API pushes parameters one by one into an existing DCtx context. - * Parameters are sticky, and remain valid for all following frames - * using the same DCtx context. - * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). - * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). - * Therefore, no new decompression function is necessary. - */ - -typedef enum { - - ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). - * Special: value 0 means "use default maximum windowLog". */ - - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_d_format - * ZSTD_d_stableOutBuffer - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000, - ZSTD_d_experimentalParam2=1001 - -} ZSTD_dParameter; - -/*! ZSTD_dParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - both lower and upper bounds, inclusive - */ -ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); - -/*! ZSTD_DCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_dParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is only possible during frame initialization (before starting decompression). - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); - -/*! ZSTD_DCtx_reset() : - * Return a DCtx to clean state. - * Session and parameters can be reset jointly or separately. - * Parameters can only be reset when no active frame is being decompressed. - * @return : 0, or an error code, which can be tested with ZSTD_isError() - */ -ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); - - -/**************************** -* Streaming -****************************/ - -typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_inBuffer; - -typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_outBuffer; - - - -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -* -* For parallel execution, use one separate ZSTD_CStream per thread. -* -* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. -* -* Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. -* When in doubt, it's recommended to fully initialize the context before usage. -* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), -* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -* set more specific parameters, the pledged source size, or load a dictionary. -* -* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to -* consume input stream. The function will automatically update both `pos` -* fields within `input` and `output`. -* Note that the function may not consume the entire input, for example, because -* the output buffer is already full, in which case `input.pos < input.size`. -* The caller must check if input has been entirely consumed. -* If not, the caller must make some room to receive more compressed data, -* and then present again remaining input data. -* note: ZSTD_e_continue is guaranteed to make some forward progress when called, -* but doesn't guarantee maximal forward progress. This is especially relevant -* when compressing with multiple threads. The call won't block if it can -* consume some input, but if it can't it will wait for some, but not all, -* output to be flushed. -* @return : provides a minimum amount of data remaining to be flushed from internal buffers -* or an error code, which can be tested using ZSTD_isError(). -* -* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, -* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. -* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). -* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the -* operation. -* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if internal buffers are entirely flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to -* start a new frame. -* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if frame fully completed and fully flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ - -typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ -/*===== ZSTD_CStream management functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); - -/*===== Streaming compression functions =====*/ -typedef enum { - ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ - ZSTD_e_flush=1, /* flush any data provided so far, - * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. - * note : multithreaded compression will block to flush as much output as possible. */ - ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. - * note that frame is only closed after compressed data is fully flushed (return value == 0). - * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). - : note : multithreaded compression will block to flush as much output as possible. */ -} ZSTD_EndDirective; - -/*! ZSTD_compressStream2() : - * Behaves about the same as ZSTD_compressStream, with additional control on end directive. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) - * - output->pos must be <= dstCapacity, input->pos must be <= srcSize - * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, - * and then immediately returns, just indicating that there is some data remaining to be flushed. - * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. - * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. - * - @return provides a minimum amount of data remaining to be flushed from internal buffers - * or an error code, which can be tested using ZSTD_isError(). - * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. - * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. - * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. - * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); - - -/* These buffer sizes are softly recommended. - * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. - * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), - * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. - * - * However, note that these recommendations are from the perspective of a C caller program. - * If the streaming interface is invoked from some other language, - * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, - * a major performance rule is to reduce crossing such interface to an absolute minimum. - * It's not rare that performance ends being spent more into the interface, rather than compression itself. - * In which cases, prefer using large buffers, as large as practical, - * for both input and output, to reduce the nb of roundtrips. - */ -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ - - -/* ***************************************************************************** - * This following is a legacy streaming API. - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. - ******************************************************************************/ - -/*! - * Equivalent to: - * - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - */ -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/*! - * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). - * NOTE: The return value is different. ZSTD_compressStream() returns a hint for - * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the minimum nb of bytes left to flush (if non-zero and not an error). - */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); - - -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation. -* @return : recommended first input size -* Alternatively, use advanced API to set specific properties. -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* The function tries to flush all data decoded immediately, respecting output buffer size. -* If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., -* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. -* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. -* @return : 0 when a frame is completely decoded and fully flushed, -* or an error code, which can be tested using ZSTD_isError(), -* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : -* the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. -* *******************************************************************************/ - -typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ -/*===== ZSTD_DStream management functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); - -/*===== Streaming decompression functions =====*/ - -/* This function is redundant with the advanced API and equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); - */ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); - -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ - - -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : - * Compression at an explicit compression level using a Dictionary. - * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dict/zdict.h). - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); - -/*! ZSTD_decompress_usingDict() : - * Decompression using a known Dictionary. - * Dictionary must be identical to the one used during compression. - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); - - -/*********************************** - * Bulk processing dictionary API - **********************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; - -/*! ZSTD_createCDict() : - * When compressing multiple messages or blocks using the same dictionary, - * it's recommended to digest the dictionary only once, since it's a costly operation. - * ZSTD_createCDict() will create a state from digesting a dictionary. - * The resulting state can be used for future compression operations with very limited startup cost. - * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. - * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, - * in which case the only thing that it transports is the @compressionLevel. - * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, - * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); - -/*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); - -/*! ZSTD_compress_usingCDict() : - * Compression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. - * Note : compression level is _decided at dictionary creation time_, - * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict); - - -typedef struct ZSTD_DDict_s ZSTD_DDict; - -/*! ZSTD_createDDict() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); - -/*! ZSTD_decompress_usingDDict() : - * Decompression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); - - -/******************************** - * Dictionary helper functions - *******************************/ - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); - - -/******************************************************************************* - * Advanced dictionary and prefix API - * - * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. - ******************************************************************************/ - - -/*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from `dict` buffer. - * Decompression will have to use same dictionary. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, - * compression parameters can no longer be changed after loading a dictionary. - * Note 3 :`dict` content will be copied internally. - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - -/*! ZSTD_CCtx_refCDict() : - * Reference a prepared dictionary, to be used for all next compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. - * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. - * The dictionary will remain valid for future compressed frames using same CCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Referencing a NULL CDict means "return to no-dictionary mode". - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ -ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); - -/*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) for next compressed frame. - * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. - * Its content must remain unmodified during compression. - * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - * ensure that the window size is large enough to contain the entire source. - * See ZSTD_c_windowLog. - * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU consuming operation, with non-negligible impact on latency. - * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). - * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); - -/*! ZSTD_DCtx_loadDictionary() : - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Loading a dictionary involves building tables, - * which has a non-negligible impact on CPU usage and latency. - * It's recommended to "load once, use many times", to amortize the cost - * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. - * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. - * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of - * how dictionary content is loaded and interpreted. - */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); - -/*! ZSTD_DCtx_refDDict() : - * Reference a prepared dictionary, to be used to decompress next frames. - * The dictionary remains active for decompression of future frames using same DCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -/*! ZSTD_DCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) to decompress next frame. - * This is the reverse operation of ZSTD_CCtx_refPrefix(), - * and must use the same prefix as the one used during compression. - * Prefix is **only used once**. Reference is discarded at end of frame. - * End of frame is reached when ZSTD_decompressStream() returns 0. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. - * Prefix buffer must remain unmodified up to the end of frame, - * reached when ZSTD_decompressStream() returns 0. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). - * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) - * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. - * A full dictionary is more costly, as it requires building tables. - */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, - const void* prefix, size_t prefixSize); - -/* === Memory management === */ - -/*! ZSTD_sizeof_*() : - * These functions give the _current_ memory usage of selected object. - * Note that object memory usage can evolve (increase or decrease) over time. */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); - -} -#endif /* ZSTD_H_235446 */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h b/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h deleted file mode 100644 index 653074666..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +++ /dev/null @@ -1,449 +0,0 @@ -/* ****************************************************************** - * bitstream - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - -/*-**************************************** -* Dependencies -******************************************/ -#include "zstd/common/mem.h" /* unaligned access routines */ -#include "zstd/common/compiler.h" /* UNLIKELY() */ -#include "zstd/common/debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ -#include "zstd/common/error_private.h" /* error codes and messages */ - - -/*========================================= -* Target specific -=========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#elif defined(__ICCARM__) -# include -#endif - -#define STREAM_ACCUMULATOR_MIN_32 25 -#define STREAM_ACCUMULATOR_MIN_64 57 -#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) - -namespace duckdb_zstd { - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. - * A critical property of these streams is that they encode and decode in **reverse** direction. - * So the first bit sequence you add will be the last to be read, like a LIFO stack. - */ -typedef struct { - size_t bitContainer; - unsigned bitPos; - char* startPtr; - char* ptr; - char* endPtr; -} BIT_CStream_t; - -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct { - size_t bitContainer; - unsigned bitsConsumed; - const char* ptr; - const char* start; - const char* limitPtr; -} BIT_DStream_t; - -typedef enum { BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ - /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); - - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - - -/*-**************************************** -* unsafe API -******************************************/ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); -/* unsafe version; does not check buffer overflow */ - -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - - - -/*-************************************************************** -* Internal functions -****************************************************************/ -MEM_STATIC unsigned BIT_highbit32 (U32 val) -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, - 11, 14, 16, 18, 22, 25, 3, 30, - 8, 12, 20, 28, 15, 17, 24, 7, - 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; -# endif - } -} - -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = { - 0, 1, 3, 7, 0xF, 0x1F, - 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, - 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, - 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, - 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, - 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ -#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(size_t) - * @return : 0 if success, - * otherwise an error code (can be tested using ERR_isError()) */ -MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, - void* startPtr, size_t dstCapacity) -{ - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char*)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); - if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); - return 0; -} - -/*! BIT_addBits() : - * can add up to 31 bits into `bitC`. - * Note : does not check for register overflow ! */ -MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); - assert(nbBits < BIT_MASK_SIZE); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, - * meaning all high bits above nbBits are 0 */ -MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, - size_t value, unsigned nbBits) -{ - assert((value>>nbBits) == 0); - assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_flushBitsFast() : - * assumption : bitContainer has not overflowed - * unsafe version; does not check buffer overflow */ -MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -/*! BIT_flushBits() : - * assumption : bitContainer has not overflowed - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. - * overflow will be revealed later on using BIT_closeCStream() */ -MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); - assert(bitC->ptr <= bitC->endPtr); - MEM_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes*8; -} - -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - * or 0 if it could not fit into dstBuffer */ -MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} - - -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : - * Initialize a BIT_DStream_t. - * `bitD` : a pointer to an already allocated BIT_DStream_t structure. - * `srcSize` must be the *exact* size of the bitStream, in bytes. - * @return : size of stream (== srcSize), or an errorCode if a problem is detected - */ -MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } - - bitD->start = (const char*)srcBuffer; - bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); - - if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = MEM_readLEST(bitD->ptr); - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } - } else { - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE*)(bitD->start); - switch(srcSize) - { - case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); - /* fall-through */ - - case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); - /* fall-through */ - - case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); - /* fall-through */ - - case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; - /* fall-through */ - - case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; - /* fall-through */ - - case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; - /* fall-through */ - - default: break; - } - { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; - } - - return srcSize; -} - -MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) -{ - return bitContainer >> start; -} - -MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) -{ - U32 const regMask = sizeof(bitContainer)*8 - 1; - /* if start > regMask, bitstream is corrupted, and result is undefined */ - assert(nbBits < BIT_MASK_SIZE); - return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; -} - -MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) -{ - assert(nbBits < BIT_MASK_SIZE); - return bitContainer & BIT_mask[nbBits]; -} - -/*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) -{ - /* arbitrate between double-shift and shift+mask */ -#if 1 - /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, - * bitstream is likely corrupted, and result is undefined */ - return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); -#else - /* this code path is slower on my os-x laptop */ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); -#endif -} - -/*! BIT_lookBitsFast() : - * unsafe version; only works if nbBits >= 1 */ -MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) -{ - U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; - assert(nbBits >= 1); - return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); -} - -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) -{ - bitD->bitsConsumed += nbBits; -} - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_readBitsFast() : - * unsafe version; only works only if nbBits >= 1 */ -MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - assert(nbBits >= 1); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_reloadDStreamFast() : - * Similar to BIT_reloadDStream(), but with two differences: - * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! - * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this - * point you must use BIT_reloadDStream() to reload. - */ -MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) -{ - if (UNLIKELY(bitD->ptr < bitD->limitPtr)) - return BIT_DStream_overflow; - assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = MEM_readLEST(bitD->ptr); - return BIT_DStream_unfinished; -} - -/*! BIT_reloadDStream() : - * Refill `bitD` from buffer previously set in BIT_initDStream() . - * This function is safe, it guarantees it will not read beyond src buffer. - * @return : status of `BIT_DStream_t` internal register. - * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ -MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->limitPtr) { - return BIT_reloadDStreamFast(bitD); - } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - /* start < ptr < limitPtr */ - { U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes*8; - bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ - return result; - } -} - -/*! BIT_endOfDStream() : - * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). - */ -MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); -} - -} - -#endif /* BITSTREAM_H_MODULE */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/compiler.h b/src/duckdb/third_party/zstd/include/zstd/common/compiler.h deleted file mode 100644 index b94dbad13..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPILER_H -#define ZSTD_COMPILER_H - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -/* force inlining */ - -#if !defined(ZSTD_NO_INLINE) -#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#else - -#define INLINE_KEYWORD -#define FORCE_INLINE_ATTR - -#endif - -/** - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -/** - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers - * performance. - * - * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the - * always_inline attribute. - * - * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline - * attribute. - */ -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 -# define HINT_INLINE static INLINE_KEYWORD -#else -# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -#endif - -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR -#endif - -/* force no inlining */ -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif - -/* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -#else -# define TARGET_ATTRIBUTE(target) -#endif - -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. - */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif - -/* prefetch - * can be disabled, by declaring NO_PREFETCH build macro */ -#if defined(NO_PREFETCH) -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -#else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) -# elif defined(__aarch64__) -# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) -# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) -# else -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* NO_PREFETCH */ - -#define CACHELINE_SIZE 64 - -#define PREFETCH_AREA(p, s) { \ - const char* const _ptr = (const char*)(p); \ - size_t const _size = (size_t)(s); \ - size_t _pos; \ - for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ - PREFETCH_L2(_ptr + _pos); \ - } \ -} - -/* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) -# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) -# else -# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") -# endif -#else -# define DONT_VECTORIZE -#endif - -/* Tell the compiler that a branch is likely or unlikely. - * Only use these macros if it causes the compiler to generate better code. - * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc - * and clang, please do. - */ -#if defined(__GNUC__) -#ifndef LIKELY -#define LIKELY(x) (__builtin_expect((x), 1)) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (__builtin_expect((x), 0)) -#endif -#else -#ifndef LIKELY -#define LIKELY(x) (x) -#endif -#ifndef UNLIKELY -#define UNLIKELY(x) (x) -#endif -#endif - -#endif /* ZSTD_COMPILER_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/debug.h b/src/duckdb/third_party/zstd/include/zstd/common/debug.h deleted file mode 100644 index ac6224888..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/debug.h +++ /dev/null @@ -1,114 +0,0 @@ -/* ****************************************************************** - * debug - * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - - -/* - * The purpose of this header is to enable debug functions. - * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, - * and DEBUG_STATIC_ASSERT() for compile-time. - * - * By default, DEBUGLEVEL==0, which means run-time debug is disabled. - * - * Level 1 enables assert() only. - * Starting level 2, traces can be generated and pushed to stderr. - * The higher the level, the more verbose the traces. - * - * It's possible to dynamically adjust level using variable g_debug_level, - * which is only declared if DEBUGLEVEL>=2, - * and is a global variable, not multi-thread protected (use with care) - */ - -#ifndef DEBUG_H_12987983217 -#define DEBUG_H_12987983217 - -#if defined (__cplusplus) -extern "C" { -#endif - - -/* static assert is triggered at compile time, leaving no runtime artefact. - * static assert only works with compile-time constants. - * Also, this variant can only be used inside a function. */ -#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) - - -/* DEBUGLEVEL is expected to be defined externally, - * typically through compiler command line. - * Value must be a number. */ -#ifndef DEBUGLEVEL -# define DEBUGLEVEL 0 -#endif - - -/* DEBUGFILE can be defined externally, - * typically through compiler command line. - * note : currently useless. - * Value must be stderr or stdout */ -#ifndef DEBUGFILE -# define DEBUGFILE stderr -#endif - - -/* recommended values for DEBUGLEVEL : - * 0 : release mode, no debug, all run-time checks disabled - * 1 : enables assert() only, no display - * 2 : reserved, for currently active debug path - * 3 : events once per object lifetime (CCtx, CDict, etc.) - * 4 : events once per frame - * 5 : events once per block - * 6 : events once per sequence (verbose) - * 7+: events at every position (*very* verbose) - * - * It's generally inconvenient to output traces > 5. - * In which case, it's possible to selectively trigger high verbosity levels - * by modifying g_debug_level. - */ - -#if (DEBUGLEVEL>=1) -# include -#else -# ifndef assert /* assert may be already defined, due to prior #include */ -# define assert(condition) ((void)0) /* disable assert (default) */ -# endif -#endif - -#if (DEBUGLEVEL>=2) -# include -extern int g_debuglevel; /* the variable is only declared, - it actually lives in debug.c, - and is shared by the whole process. - It's not thread-safe. - It's useful when enabling very verbose levels - on selective conditions (such as position in src) */ - -# define RAWLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __VA_ARGS__); \ - } } -# define DEBUGLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } -#else -# define RAWLOG(l, ...) {} /* disabled */ -# define DEBUGLOG(l, ...) {} /* disabled */ -#endif - - -#if defined (__cplusplus) -} -#endif - -#endif /* DEBUG_H_12987983217 */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/error_private.h b/src/duckdb/third_party/zstd/include/zstd/common/error_private.h deleted file mode 100644 index b1af95d12..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* Note : this module is expected to remain private, do not expose it */ - -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ -#include "zstd/common/zstd_errors.h" /* enum list */ - -namespace duckdb_zstd { -/* **************************************** -* Compiler-specific -******************************************/ -#if defined(__GNUC__) -# define ERR_STATIC static __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define ERR_STATIC static inline -#elif defined(_MSC_VER) -# define ERR_STATIC static __inline -#else -# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - - -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name - - -/*-**************************************** -* Error codes handling -******************************************/ -#undef ERROR /* already defined on Visual Studio */ -#define ERROR(name) ZSTD_ERROR(name) -#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - -ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } - -/* check and forward error code */ -#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e -#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } - - -/*-**************************************** -* Error Strings -******************************************/ - -const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ - -ERR_STATIC const char* ERR_getErrorName(size_t code) -{ - return ERR_getErrorString(ERR_getErrorCode(code)); -} - -} - -#endif /* ERROR_H_MODULE */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/fse.h b/src/duckdb/third_party/zstd/include/zstd/common/fse.h deleted file mode 100644 index 6600fee54..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/fse.h +++ /dev/null @@ -1,277 +0,0 @@ -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H -#define FSE_H - - -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ - - -namespace duckdb_zstd { -/*-***************************************** -* FSE_PUBLIC_API : control library symbols visibility -******************************************/ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define FSE_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define FSE_PUBLIC_API -#endif - -/*------ Version ------*/ -#define FSE_VERSION_MAJOR 0 -#define FSE_VERSION_MINOR 9 -#define FSE_VERSION_RELEASE 0 - -#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -#define FSE_QUOTE(str) #str -#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) - -#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) -FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ - - -/*-**************************************** -* FSE simple functions -******************************************/ -/*! FSE_compress() : - Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. - 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). - @return : size of compressed data (<= dstCapacity). - Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. - if FSE_isError(return), compression failed (more details using FSE_getErrorName()) -*/ -FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/*! FSE_decompress(): - Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', - into already allocated destination buffer 'dst', of size 'dstCapacity'. - @return : size of regenerated data (<= maxDstSize), - or an error code, which can be tested using FSE_isError() . - - ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! - Why ? : making this distinction requires a header. - Header management is intentionally delegated to the user layer, which can better manage special cases. -*/ -FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, - const void* cSrc, size_t cSrcSize); - - -/*-***************************************** -* Tool functions -******************************************/ -FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ - -/* Error Management */ -FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ -FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ - - -/*-***************************************** -* FSE advanced functions -******************************************/ -/*! FSE_compress2() : - Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' - Both parameters can be defined as '0' to mean : use default value - @return : size of compressed data - Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! - if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. - if FSE_isError(return), it's an error code. -*/ -FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); - - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] (see hist.h) -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ - -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= 'maxTableLog') */ -FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, - const short* normalizedCounter, - unsigned maxSymbolValue, unsigned tableLog); - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ -FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); -FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); - -/*! FSE_buildCTable(): - Builds `ct`, which must be already allocated, using FSE_createCTable(). - @return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - - -/* *** DECOMPRESSION *** */ - -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, - unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, - const void* rBuffer, size_t rBuffSize); - -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ -FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); -FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); - -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) - -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ - -} - -#endif /* FSE_H */ - diff --git a/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h b/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h deleted file mode 100644 index 7d8267e3e..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +++ /dev/null @@ -1,421 +0,0 @@ -/* ****************************************************************** - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef FSE_H_FSE_STATIC_LINKING_ONLY -#define FSE_H_FSE_STATIC_LINKING_ONLY - -/* *** Dependency *** */ -#include "zstd/common/bitstream.h" - -namespace duckdb_zstd { - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) -size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); - -size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - -size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); -/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) -{ - const void* ptr = ct; - const U16* u16ptr = (const U16*) ptr; - const U32 tableLog = MEM_read16(ptr); - statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; - statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); - statePtr->stateLog = tableLog; -} - - -/*! FSE_initCState2() : -* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -* uses the smallest state value possible, saving the cost of this symbol */ -MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* stateTable = (const U16*)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } -} - -MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) -{ - FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; - const U16* const stateTable = (const U16*)(statePtr->stateTable); - U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) -{ - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} - - -/* FSE_getMaxNbBits() : - * Approximate maximum cost of a symbol, in bits. - * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; -} - -/* FSE_bitCost() : - * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) - * note 1 : assume symbolValue is valid (<= maxSymbolValue) - * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ -MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) -{ - const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; - U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; - U32 const threshold = (minNbBits+1) << 16; - assert(tableLog < 16); - assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ - { U32 const tableSize = 1 << tableLog; - U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); - U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ - U32 const bitMultiplier = 1 << accuracyLog; - assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); - assert(normalizedDeltaFromThreshold <= bitMultiplier); - return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; - } -} - - -/* ====== Decompression ====== */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct -{ - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) -{ - const void* ptr = dt; - const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; -} - -MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} - -MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) -{ - return DStatePtr->state == 0; -} - - - -#ifndef FSE_COMMONDEFS_ONLY - -/* ************************************************************** -* Tuning parameters -****************************************************************/ -/*!MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#ifndef FSE_MAX_MEMORY_USAGE -# define FSE_MAX_MEMORY_USAGE 14 -#endif -#ifndef FSE_DEFAULT_MEMORY_USAGE -# define FSE_DEFAULT_MEMORY_USAGE 13 -#endif - -/*!FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#ifndef FSE_MAX_SYMBOL_VALUE -# define FSE_MAX_SYMBOL_VALUE 255 -#endif - -/* ************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION -#define FSE_DECODE_TYPE FSE_decode_t - - -#endif /* !FSE_COMMONDEFS_ONLY */ - - -/* *************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) -#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX -# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - -#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) - -} - -#endif /* FSE_H_FSE_STATIC_LINKING_ONLY */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/huf.h b/src/duckdb/third_party/zstd/include/zstd/common/huf.h deleted file mode 100644 index 4bba72729..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/huf.h +++ /dev/null @@ -1,97 +0,0 @@ -/* ****************************************************************** - * huff0 huffman codec, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -#ifndef HUF_H_298734234 -#define HUF_H_298734234 - -/* *** Dependencies *** */ -#include /* size_t */ - - -/* *** library symbols visibility *** */ -/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, - * HUF symbols remain "private" (internal symbols for library only). - * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ -#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) -# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) -#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ -# define HUF_PUBLIC_API __declspec(dllexport) -#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) -# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ -#else -# define HUF_PUBLIC_API -#endif - -namespace duckdb_zstd { - -/* ========================== */ -/* *** simple functions *** */ -/* ========================== */ - -/** HUF_compress() : - * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. - * 'dst' buffer must be already allocated. - * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). - * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. - * @return : size of compressed data (<= `dstCapacity`). - * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! - * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) - */ -HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - -/** HUF_decompress() : - * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', - * into already allocated buffer 'dst', of minimum size 'dstSize'. - * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. - * Note : in contrast with FSE, HUF_decompress can regenerate - * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, - * because it knows size to regenerate (originalSize). - * @return : size of regenerated data (== originalSize), - * or an error code, which can be tested using HUF_isError() - */ -HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, - const void* cSrc, size_t cSrcSize); - - -/* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ - -/* Error Management */ -HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ -HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ - - -/* *** Advanced function *** */ - -/** HUF_compress2() : - * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. - * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . - * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ -HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog); - -/** HUF_compress4X_wksp() : - * Same as HUF_compress2(), but uses externally allocated `workSpace`. - * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) -#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) -HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize); - -#endif /* HUF_H_298734234 */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h b/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h deleted file mode 100644 index 7701972e6..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +++ /dev/null @@ -1,238 +0,0 @@ -/* ****************************************************************** - * WARNING !! - * The following section contains advanced and experimental definitions - * which shall never be used in the context of a dynamic library, - * because they are not guaranteed to remain stable in the future. - * Only consider them in association with static linking. - * **************************************************************** */ -#ifndef HUF_H_HUF_STATIC_LINKING_ONLY -#define HUF_H_HUF_STATIC_LINKING_ONLY - -/* *** Dependencies *** */ -#include "zstd/common/mem.h" /* U32 */ - - -/* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ -#define HUF_SYMBOLVALUE_MAX 255 - -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -# error "HUF_TABLELOG_MAX is too large !" -#endif - - -/* **************************************** -* Static allocation -******************************************/ -/* HUF buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of HUF's Compression Table */ -#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ -#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) -#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ - void* name##hv = &(name##hb); \ - HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ - -/* static allocation of HUF's DTable */ -typedef U32 HUF_DTable; -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ - HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } - - -/* **************************************** -* Advanced decompression functions -******************************************/ -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -#endif - -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif - - -/* **************************************** - * HUF detailed API - * ****************************************/ - -/*! HUF_compress() does the following: - * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "zstd/common/fse.h") - * 2. (optional) refine tableLog using HUF_optimalTableLog() - * 3. build Huffman table from count using HUF_buildCTable() - * 4. save Huffman table to memory buffer using HUF_writeCTable() - * 5. encode the data stream using HUF_compress4X_usingCTable() - * - * The following API allows targeting specific sub-functions for advanced tasks. - * For example, it's possible to compress several blocks using the same 'CTable', - * or to save and regenerate 'CTable' using external methods. - */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ -size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); -size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); -int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); - -typedef enum { - HUF_repeat_none, /**< Cannot use the previous table */ - HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ - } HUF_repeat; -/** HUF_compress4X_repeat() : - * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress4X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. - */ -#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) -#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) -size_t HUF_buildCTable_wksp (HUF_CElt* tree, - const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, - void* workSpace, size_t wkspSize); - -/*! HUF_readStats() : - * Read compact Huffman tree, saved by HUF_writeCTable(). - * `huffWeight` is destination buffer. - * @return : size read from `src` , or an error Code . - * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, - U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, - const void* src, size_t srcSize); - -/** HUF_readCTable() : - * Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); - -/** HUF_getNbBits() : - * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX - * Note 1 : is not inlined, as HUF_CElt definition is private - * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ -U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); - -/* - * HUF_decompress() does the following: - * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics - * 2. build Huffman table from save, using HUF_readDTableX?() - * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() - */ - -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); - -/** - * The minimum workspace size for the `workSpace` used in - * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). - * - * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when - * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. - * Buffer overflow errors may potentially occur if code modifications result in - * a required workspace size greater than that specified in the following - * macro. - */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) -#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); -size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); -#endif - -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif - - -/* ====================== */ -/* single stream variants */ -/* ====================== */ - -size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); -size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ -size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); -/** HUF_compress1X_repeat() : - * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. - * If it uses hufTable it does not modify hufTable or repeat. - * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. - * If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress1X_repeat(void* dst, size_t dstSize, - const void* src, size_t srcSize, - unsigned maxSymbolValue, unsigned tableLog, - void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ - HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); - -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ -#endif - -size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); -size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ -#endif - -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif -#ifndef HUF_FORCE_DECOMPRESS_X1 -size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); -#endif - -/* BMI2 variants. - * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. - */ -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); -#endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); - -} - -#endif /* HUF_STATIC_LINKING_ONLY */ - diff --git a/src/duckdb/third_party/zstd/include/zstd/common/mem.h b/src/duckdb/third_party/zstd/include/zstd/common/mem.h deleted file mode 100644 index 7c7b1f32d..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/mem.h +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -/*-**************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ - - -/*-**************************************** -* Compiler specifics -******************************************/ -#if defined(_MSC_VER) /* Visual Studio */ -# include /* _byteswap_ulong */ -# include /* _byteswap_* */ -#endif -#if defined(__GNUC__) -# define MEM_STATIC static __inline __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline -#elif defined(_MSC_VER) -# define MEM_STATIC static __inline -#else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ -#endif - -#ifndef __has_builtin -# define __has_builtin(x) 0 /* compat. with non-clang compilers */ -#endif - -/* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } -MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } - -/* detects whether we are being compiled under msan */ -#if defined (__has_feature) -# if __has_feature(memory_sanitizer) -# define MEMORY_SANITIZER 1 -# endif -#endif - -/*-************************************************************** -* Basic Types -*****************************************************************/ -#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else -# include -#if CHAR_BIT != 8 -# error "this implementation requires char to be exactly 8-bit type" -#endif - typedef unsigned char BYTE; -#if USHRT_MAX != 65535 -# error "this implementation requires short to be exactly 16-bit type" -#endif - typedef unsigned short U16; - typedef signed short S16; -#if UINT_MAX != 4294967295 -# error "this implementation requires int to be exactly 32-bit type" -#endif - typedef unsigned int U32; - typedef signed int S32; -/* note : there are no limits defined for long long type in C90. - * limits exist in C99, however, in such case, is preferred */ - typedef unsigned long long U64; - typedef signed long long S64; -#endif - -namespace duckdb_zstd { - -/*-************************************************************** -* Memory I/O -*****************************************************************/ -/* MEM_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets depending on alignment. - * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) -# define MEM_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } -MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } - -MEM_STATIC unsigned MEM_isLittleEndian(void) -{ - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; -} - -#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) - -/* violates C standard, by lying on structure alignment. -Only use if no other choice to achieve best performance on target platform */ -MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } -MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } -MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } -MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } - -#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) - __pragma( pack(push, 1) ) - typedef struct { U16 v; } unalign16; - typedef struct { U32 v; } unalign32; - typedef struct { U64 v; } unalign64; - typedef struct { size_t v; } unalignArch; - __pragma( pack(pop) ) -#else - typedef struct { U16 v; } __attribute__((packed)) unalign16; - typedef struct { U32 v; } __attribute__((packed)) unalign32; - typedef struct { U64 v; } __attribute__((packed)) unalign64; - typedef struct { size_t v; } __attribute__((packed)) unalignArch; -#endif - -MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } -MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } -MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } -MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } -MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } -MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } - -#else - -/* default method, safe and standard. - can sometimes prove slower */ - -MEM_STATIC U16 MEM_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U32 MEM_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC U64 MEM_read64(const void* memPtr) -{ - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC size_t MEM_readST(const void* memPtr) -{ - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -MEM_STATIC void MEM_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write32(void* memPtr, U32 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -MEM_STATIC void MEM_write64(void* memPtr, U64 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif /* MEM_FORCE_MEMORY_ACCESS */ - -MEM_STATIC U32 MEM_swap32(U32 in) -{ -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_ulong(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap32)) - return __builtin_bswap32(in); -#else - return ((in << 24) & 0xff000000 ) | - ((in << 8) & 0x00ff0000 ) | - ((in >> 8) & 0x0000ff00 ) | - ((in >> 24) & 0x000000ff ); -#endif -} - -MEM_STATIC U64 MEM_swap64(U64 in) -{ -#if defined(_MSC_VER) /* Visual Studio */ - return _byteswap_uint64(in); -#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ - || (defined(__clang__) && __has_builtin(__builtin_bswap64)) - return __builtin_bswap64(in); -#else - return ((in << 56) & 0xff00000000000000ULL) | - ((in << 40) & 0x00ff000000000000ULL) | - ((in << 24) & 0x0000ff0000000000ULL) | - ((in << 8) & 0x000000ff00000000ULL) | - ((in >> 8) & 0x00000000ff000000ULL) | - ((in >> 24) & 0x0000000000ff0000ULL) | - ((in >> 40) & 0x000000000000ff00ULL) | - ((in >> 56) & 0x00000000000000ffULL); -#endif -} - -MEM_STATIC size_t MEM_swapST(size_t in) -{ - if (MEM_32bits()) - return (size_t)MEM_swap32((U32)in); - else - return (size_t)MEM_swap64((U64)in); -} - -/*=== Little endian r/w ===*/ - -MEM_STATIC U16 MEM_readLE16(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read16(memPtr); - else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)(p[0] + (p[1]<<8)); - } -} - -MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) -{ - if (MEM_isLittleEndian()) { - MEM_write16(memPtr, val); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE)val; - p[1] = (BYTE)(val>>8); - } -} - -MEM_STATIC U32 MEM_readLE24(const void* memPtr) -{ - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); -} - -MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) -{ - MEM_writeLE16(memPtr, (U16)val); - ((BYTE*)memPtr)[2] = (BYTE)(val>>16); -} - -MEM_STATIC U32 MEM_readLE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read32(memPtr); - else - return MEM_swap32(MEM_read32(memPtr)); -} - -MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) -{ - if (MEM_isLittleEndian()) - MEM_write32(memPtr, val32); - else - MEM_write32(memPtr, MEM_swap32(val32)); -} - -MEM_STATIC U64 MEM_readLE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_read64(memPtr); - else - return MEM_swap64(MEM_read64(memPtr)); -} - -MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) -{ - if (MEM_isLittleEndian()) - MEM_write64(memPtr, val64); - else - MEM_write64(memPtr, MEM_swap64(val64)); -} - -MEM_STATIC size_t MEM_readLEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readLE32(memPtr); - else - return (size_t)MEM_readLE64(memPtr); -} - -MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeLE32(memPtr, (U32)val); - else - MEM_writeLE64(memPtr, (U64)val); -} - -/*=== Big endian r/w ===*/ - -MEM_STATIC U32 MEM_readBE32(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_swap32(MEM_read32(memPtr)); - else - return MEM_read32(memPtr); -} - -MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) -{ - if (MEM_isLittleEndian()) - MEM_write32(memPtr, MEM_swap32(val32)); - else - MEM_write32(memPtr, val32); -} - -MEM_STATIC U64 MEM_readBE64(const void* memPtr) -{ - if (MEM_isLittleEndian()) - return MEM_swap64(MEM_read64(memPtr)); - else - return MEM_read64(memPtr); -} - -MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) -{ - if (MEM_isLittleEndian()) - MEM_write64(memPtr, MEM_swap64(val64)); - else - MEM_write64(memPtr, val64); -} - -MEM_STATIC size_t MEM_readBEST(const void* memPtr) -{ - if (MEM_32bits()) - return (size_t)MEM_readBE32(memPtr); - else - return (size_t)MEM_readBE64(memPtr); -} - -MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) -{ - if (MEM_32bits()) - MEM_writeBE32(memPtr, (U32)val); - else - MEM_writeBE64(memPtr, (U64)val); -} - -} - -#endif /* MEM_H_MODULE */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/xxhash.h b/src/duckdb/third_party/zstd/include/zstd/common/xxhash.h deleted file mode 100644 index a6b7990c8..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/xxhash.h +++ /dev/null @@ -1,235 +0,0 @@ -/* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. - -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. -Name Speed on 64 bits Speed on 32 bits -XXH64 13.8 GB/s 1.9 GB/s -XXH32 6.8 GB/s 6.0 GB/s -*/ - -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - - -/* **************************** -* Definitions -******************************/ -#include /* size_t */ -namespace duckdb_zstd { -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; - - -/* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful if you want to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "zstd/common/xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module anymore. -*/ -#ifdef XXH_PRIVATE_API -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ -# endif -#else -# define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ - -/*!XXH_NAMESPACE, aka Namespace Emulation : - -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with another library which also includes xxHash, - -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). - -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) -#endif - - -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 2 -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) -XXH_PUBLIC_API unsigned XXH_versionNumber (void); - - -/* **************************** -* Simple Hash Functions -******************************/ -typedef unsigned int XXH32_hash_t; -typedef unsigned long long XXH64_hash_t; - -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); - -/*! -XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). -*/ - - -/* **************************** -* Streaming Hash Functions -******************************/ -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ - -/*! State allocation, compatible with dynamic libraries */ - -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); - - -/* hash streaming */ - -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); - -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); - -/* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . - -XXH state must first be allocated, using XXH*_createState() . - -Start a new hash by initializing state with a seed, using XXH*_reset(). - -Then, feed the hash state by calling XXH*_update() as many times as necessary. -Obviously, input must be allocated and read accessible. -The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. - -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. - -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). - -When done, free XXH state space if it was allocated dynamically. -*/ - - -/* ************************** -* Utils -****************************/ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ -# define __restrict /* disable restrict */ -#endif - -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* __restrict dst_state, const XXH32_state_t* __restrict src_state); -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* __restrict dst_state, const XXH64_state_t* __restrict src_state); - - -/* ************************** -* Canonical representation -****************************/ -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ -typedef struct { unsigned char digest[4]; } XXH32_canonical_t; -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; - -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); - -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); - -} - -#endif /* XXHASH_H_5627135585666179 */ - diff --git a/src/duckdb/third_party/zstd/include/zstd/common/xxhash_static.h b/src/duckdb/third_party/zstd/include/zstd/common/xxhash_static.h deleted file mode 100644 index b8a8f2a72..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/xxhash_static.h +++ /dev/null @@ -1,45 +0,0 @@ - -/* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! -=================================================================================================== */ -#ifndef XXH_STATIC_H_3543687687345 -#define XXH_STATIC_H_3543687687345 - -namespace duckdb_zstd { - -/* These definitions are only meant to allow allocation of XXH state - statically, on stack, or in a struct for example. - Do not use members directly. */ - - struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ - unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH32_state_t */ - - struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ - unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH64_state_t */ - -} -// # ifdef XXH_PRIVATE_API -// # include "xxhash.cpp" /* include xxhash functions as `static`, for inlining */ -// # endif - -#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/zstd_errors.h b/src/duckdb/third_party/zstd/include/zstd/common/zstd_errors.h deleted file mode 100644 index a719982bc..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/zstd_errors.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_ERRORS_H_398273423 -#define ZSTD_ERRORS_H_398273423 - -/*===== dependency =====*/ -#include /* size_t */ - - -/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDERRORLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDERRORLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY -#endif -namespace duckdb_zstd { -/*-********************************************* - * Error codes list - *-********************************************* - * Error codes _values_ are pinned down since v1.3.1 only. - * Therefore, don't rely on values if you may link to any version < v1.3.1. - * - * Only values < 100 are considered stable. - * - * note 1 : this API shall be used with static linking only. - * dynamic linking is not yet officially supported. - * note 2 : Prefer relying on the enum than on its value whenever possible - * This is the only supported way to use the error list < v1.3.1 - * note 3 : ZSTD_isError() is always correct, whatever the library version. - **********************************************/ -typedef enum { - ZSTD_error_no_error = 0, - ZSTD_error_GENERIC = 1, - ZSTD_error_prefix_unknown = 10, - ZSTD_error_version_unsupported = 12, - ZSTD_error_frameParameter_unsupported = 14, - ZSTD_error_frameParameter_windowTooLarge = 16, - ZSTD_error_corruption_detected = 20, - ZSTD_error_checksum_wrong = 22, - ZSTD_error_dictionary_corrupted = 30, - ZSTD_error_dictionary_wrong = 32, - ZSTD_error_dictionaryCreation_failed = 34, - ZSTD_error_parameter_unsupported = 40, - ZSTD_error_parameter_outOfBound = 42, - ZSTD_error_tableLog_tooLarge = 44, - ZSTD_error_maxSymbolValue_tooLarge = 46, - ZSTD_error_maxSymbolValue_tooSmall = 48, - ZSTD_error_stage_wrong = 60, - ZSTD_error_init_missing = 62, - ZSTD_error_memory_allocation = 64, - ZSTD_error_workSpace_tooSmall= 66, - ZSTD_error_dstSize_tooSmall = 70, - ZSTD_error_srcSize_wrong = 72, - ZSTD_error_dstBuffer_null = 74, - /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ - ZSTD_error_frameIndex_tooLarge = 100, - ZSTD_error_seekableIO = 102, - ZSTD_error_dstBuffer_wrong = 104, - ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ -} ZSTD_ErrorCode; - -/*! ZSTD_getErrorCode() : - convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, - which can be used to compare with enum list published above */ -ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); -ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ - -} - -#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h b/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h deleted file mode 100644 index 93cb45856..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE - -/* this module contains definitions which must be identical - * across compression, decompression and dictBuilder. - * It also contains a few functions useful to at least 2 of them - * and which benefit from being inlined */ - -/*-************************************* -* Dependencies -***************************************/ -#ifdef __aarch64__ -#include -#endif -#include "zstd/common/compiler.h" -#include "zstd/common/mem.h" -#include "zstd/common/debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ -#include "zstd/common/error_private.h" -#include "zstd.h" -#include "zstd_static.h" -#include "zstd/common/fse.h" -#include "zstd/common/fse_static.h" -#include "zstd/common/huf.h" -#include "zstd/common/huf_static.h" -#include "zstd/common/xxhash.h" /* XXH_reset, update, digest */ -#include "zstd/common/xxhash_static.h" /* XXH_reset, update, digest */ - -namespace duckdb_zstd { - -/* ---- static assert (debug) --- */ -#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) -#define ZSTD_isError ERR_isError /* for inlining */ -// #define FSE_isError ERR_isError -// #define HUF_isError ERR_isError - - -/*-************************************* -* shared macros -***************************************/ -#undef MIN -#undef MAX -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) - -/** - * Ignore: this is an internal helper. - * - * This is a helper function to help force C99-correctness during compilation. - * Under strict compilation modes, variadic macro arguments can't be empty. - * However, variadic function arguments can be. Using a function therefore lets - * us statically check that at least one (string) argument was passed, - * independent of the compilation flags. - */ -static INLINE_KEYWORD UNUSED_ATTR -void _force_has_format_string(const char *format, ...) { - (void)format; -} - -/** - * Ignore: this is an internal helper. - * - * We want to force this function invocation to be syntactically correct, but - * we don't want to force runtime evaluation of its arguments. - */ -#define _FORCE_HAS_FORMAT_STRING(...) \ - if (0) { \ - _force_has_format_string(__VA_ARGS__); \ - } - -/** - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } - -/** - * Unconditionally return the specified error. - * - * In debug modes, prints additional information. - */ -#define RETURN_ERROR(err, ...) \ - do { \ - RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } while(0); - -/** - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ - __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); - - -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1<<12) - -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) - -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 - -#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ - -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ - -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; - -#define ZSTD_FRAMECHECKSUMSIZE 4 - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 - -#define Litbits 8 -#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); - - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { - /* Handle short offset copies. */ - do { - COPY8(op, ip) - } while (op < oend); - } else { - assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first COPY16() call because the copy length is - * almost certain to be short, so the branches have different - * probabilities. Since it is almost certain to be short, only do - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ -#ifndef __aarch64__ - do { - COPY16(op, ip); - } - while (op < oend); -#else - COPY16(op, ip); - if (op >= oend) return; - do { - COPY16(op, ip); - COPY16(op, ip); - } - while (op < oend); -#endif - } -} - -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - if (length > 0) { - memcpy(dst, src, length); - } - return length; -} - -/* define "workspace is too large" as this number of times larger than needed */ -#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 - -/* when workspace is continuously too large - * during at least this number of times, - * context's memory usage is considered wasteful, - * because it's sized to handle a worst case scenario which rarely happens. - * In which case, resize it down to free some memory */ -#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 - - -/*-******************************************* -* Private declarations -*********************************************/ -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; - -typedef struct { - seqDef* sequencesStart; - seqDef* sequences; - BYTE* litStart; - BYTE* lit; - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; -} seqStore_t; - -typedef struct { - U32 litLength; - U32 matchLength; -} ZSTD_sequenceLength; - -/** - * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. - */ -MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) -{ - ZSTD_sequenceLength seqLen; - seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->matchLength + MINMATCH; - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthID == 1) { - seqLen.litLength += 0xFFFF; - } - if (seqStore->longLengthID == 2) { - seqLen.matchLength += 0xFFFF; - } - } - return seqLen; -} - -/** - * Contains the compressed frame size and an upper-bound for the decompressed frame size. - * Note: before using `compressedSize`, check for errors using ZSTD_isError(). - * similarly, before using `decompressedBound`, check for errors using: - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ -typedef struct { - size_t compressedSize; - unsigned long long decompressedBound; -} ZSTD_frameSizeInfo; /* decompress & legacy */ - -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ - -/* custom memory allocation functions */ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void* ptr, ZSTD_customMem customMem); - - -MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif - } -} - - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ - - -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; /* declared here for decompress and fullbench */ - -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); - -/*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); - -} - -#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/hist.h b/src/duckdb/third_party/zstd/include/zstd/compress/hist.h deleted file mode 100644 index 41bbbbeab..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +++ /dev/null @@ -1,78 +0,0 @@ -/* ****************************************************************** - * hist : Histogram functions - * part of Finite State Entropy project - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - Public forum : https://groups.google.com/forum/#!forum/lz4c - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* --- dependencies --- */ -#include /* size_t */ - - -namespace duckdb_zstd { -/* --- simple histogram functions --- */ - -/*! HIST_count(): - * Provides the precise count of each byte within a table 'count'. - * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). - * Updates *maxSymbolValuePtr with actual largest symbol value detected. - * @return : count of the most frequent symbol (which isn't identified). - * or an error code, which can be tested using HIST_isError(). - * note : if return == srcSize, there is only one symbol. - */ -size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ - - -/* --- advanced histogram functions --- */ - -#define HIST_WKSP_SIZE_U32 1024 -#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) -/** HIST_count_wksp() : - * Same as HIST_count(), but using an externally provided scratch buffer. - * Benefit is this function will use very little stack space. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); - -/** HIST_countFast() : - * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` - */ -size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -/** HIST_countFast_wksp() : - * Same as HIST_countFast(), but using an externally provided scratch buffer. - * `workSpace` is a writable buffer which must be 4-bytes aligned, - * `workSpaceSize` must be >= HIST_WKSP_SIZE - */ -size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize, - void* workSpace, size_t workSpaceSize); - -/*! HIST_count_simple() : - * Same as HIST_countFast(), this function is unsafe, - * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. - * It is also a bit slower for large inputs. - * However, it does not need any additional memory (not even on stack). - * @return : count of the most frequent symbol. - * Note this function doesn't produce any error (i.e. it must succeed). - */ -unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, - const void* src, size_t srcSize); - -} diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h deleted file mode 100644 index 5e8c6e099..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +++ /dev/null @@ -1,1118 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -/* This header contains definitions - * that shall **only** be used by modules within lib/compress. - */ - -#ifndef ZSTD_COMPRESS_H -#define ZSTD_COMPRESS_H - -/*-************************************* -* Dependencies -***************************************/ -#include "zstd/common/zstd_internal.h" -#include "zstd/compress/zstd_cwksp.h" -// #ifdef ZSTD_MULTITHREAD -// # include "zstdmt_compress.h" -// #endif - -/*-************************************* -* Constants -***************************************/ -#define kSearchStrength 8 -#define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". - It could be confused for a real successor at index "1", if sorted as larger than its predecessor. - It's not a big deal though : candidate will just be sorted again. - Additionally, candidate position 1 will be lost. - But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. - This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ - - -namespace duckdb_zstd { -/*-************************************* -* Context memory management -***************************************/ -typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; -typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; - -typedef struct ZSTD_prefixDict_s { - const void* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; -} ZSTD_prefixDict; - -typedef struct { - void* dictBuffer; - void const* dict; - size_t dictSize; - ZSTD_dictContentType_e dictContentType; - ZSTD_CDict* cdict; -} ZSTD_localDict; - -typedef struct { - U32 CTable[HUF_CTABLE_SIZE_U32(255)]; - HUF_repeat repeatMode; -} ZSTD_hufCTables_t; - -typedef struct { - FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; - FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; - FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; - FSE_repeat offcode_repeatMode; - FSE_repeat matchlength_repeatMode; - FSE_repeat litlength_repeatMode; -} ZSTD_fseCTables_t; - -typedef struct { - ZSTD_hufCTables_t huf; - ZSTD_fseCTables_t fse; -} ZSTD_entropyCTables_t; - -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - int price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; - -typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; - -typedef struct { - /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ - unsigned* litFreq; /* table of literals statistics, of size 256 */ - unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ - unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ - unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ - ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ - ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ - - U32 litSum; /* nb of literals */ - U32 litLengthSum; /* nb of litLength codes */ - U32 matchLengthSum; /* nb of matchLength codes */ - U32 offCodeSum; /* nb of offset codes */ - U32 litSumBasePrice; /* to compare to log2(litfreq) */ - U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ - U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ - U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ - ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ - const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ - ZSTD_literalCompressionMode_e literalCompressionMode; -} optState_t; - -typedef struct { - ZSTD_entropyCTables_t entropy; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_compressedBlockState_t; - -typedef struct { - BYTE const* nextSrc; /* next block here to continue on current prefix */ - BYTE const* base; /* All regular indexes relative to this position */ - BYTE const* dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more valid data */ -} ZSTD_window_t; - -typedef struct ZSTD_matchState_t ZSTD_matchState_t; -struct ZSTD_matchState_t { - ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary, within context's referential. - * When loadedDictEnd != 0, a dictionary is in use, and still valid. - * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. - * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). - * When dict referential is copied into active context (i.e. not attached), - * loadedDictEnd == dictSize, since referential starts from zero. - */ - U32 nextToUpdate; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ - U32* hashTable; - U32* hashTable3; - U32* chainTable; - optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t* dictMatchState; - ZSTD_compressionParameters cParams; -}; - -typedef struct { - ZSTD_compressedBlockState_t* prevCBlock; - ZSTD_compressedBlockState_t* nextCBlock; - ZSTD_matchState_t matchState; -} ZSTD_blockState_t; - -typedef struct { - U32 offset; - U32 checksum; -} ldmEntry_t; - -typedef struct { - ZSTD_window_t window; /* State for the window round buffer management */ - ldmEntry_t* hashTable; - U32 loadedDictEnd; - BYTE* bucketOffsets; /* Next position in bucket to insert entry */ - U64 hashPower; /* Used to compute the rolling hash. - * Depends on ldmParams.minMatchLength */ -} ldmState_t; - -typedef struct { - U32 enableLdm; /* 1 if enable long distance matching */ - U32 hashLog; /* Log size of hashTable */ - U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ - U32 minMatchLength; /* Minimum match length */ - U32 hashRateLog; /* Log number of entries to skip */ - U32 windowLog; /* Window log for the LDM */ -} ldmParams_t; - -typedef struct { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; - -typedef struct { - rawSeq* seq; /* The start of the sequences */ - size_t pos; /* The position where reading stopped. <= size. */ - size_t size; /* The number of sequences. <= capacity. */ - size_t capacity; /* The capacity starting from `seq` pointer */ -} rawSeqStore_t; - -typedef struct { - int collectSequences; - ZSTD_Sequence* seqStart; - size_t seqIndex; - size_t maxSequences; -} SeqCollector; - -struct ZSTD_CCtx_params_s { - ZSTD_format_e format; - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; - - int compressionLevel; - int forceWindow; /* force back-references to respect limit of - * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; -} - -/* ZSTD_MLcode() : - * note : mlBase = matchLength - MINMATCH; - * because it's the format it's stored in seqStore->sequences */ -MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) -{ - static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, - 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, - 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; - static const U32 ML_deltaCode = 36; - return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; -} - -typedef struct repcodes_s { - U32 rep[3]; -} repcodes_t; - -MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) -{ - repcodes_t newReps; - if (offset >= ZSTD_REP_NUM) { /* full offset */ - newReps.rep[2] = rep[1]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = offset - ZSTD_REP_MOVE; - } else { /* repcode */ - U32 const repCode = offset + ll0; - if (repCode > 0) { /* note : if repCode==0, no change */ - U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; - newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; - newReps.rep[1] = rep[0]; - newReps.rep[0] = currentOffset; - } else { /* repCode == 0 */ - memcpy(&newReps, rep, sizeof(newReps)); - } - } - return newReps; -} - -/* ZSTD_cParam_withinBounds: - * @return 1 if value is within cParam bounds, - * 0 otherwise */ -MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - -/* ZSTD_noCompressBlock() : - * Writes uncompressed block to dst buffer from given src. - * Returns the size of the block */ -MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) -{ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(srcSize + ZSTDInternalConstants::ZSTD_blockHeaderSize > dstCapacity, - dstSize_tooSmall, "dst buf too small for uncompressed block"); - MEM_writeLE24(dst, cBlockHeader24); - memcpy((BYTE*)dst + ZSTDInternalConstants::ZSTD_blockHeaderSize, src, srcSize); - return ZSTDInternalConstants::ZSTD_blockHeaderSize + srcSize; -} - -MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) -{ - BYTE* const op = (BYTE*)dst; - U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); - RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); - MEM_writeLE24(op, cBlockHeader); - op[3] = src; - return 4; -} - - -/* ZSTD_minGain() : - * minimum compression required - * to generate a compress block or a compressed literals section. - * note : use same formula for both situations */ -MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) -{ - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - return (srcSize >> minlog) + 2; -} - -MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) -{ - switch (cctxParams->literalCompressionMode) { - case ZSTD_lcm_huffman: - return 0; - case ZSTD_lcm_uncompressed: - return 1; - default: - assert(0 /* impossible: pre-validated */); - /* fall-through */ - case ZSTD_lcm_auto: - return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); - } -} - -/*! ZSTD_safecopyLiterals() : - * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. - * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single - * large copies. - */ -static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { - assert(iend > ilimit_w); - if (ip <= ilimit_w) { - ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); - op += ilimit_w - ip; - ip = ilimit_w; - } - while (ip < iend) *op++ = *ip++; -} - -/*! ZSTD_storeSeq() : - * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. - * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). - * `mlBase` : matchLength - MINMATCH - * Allowed to overread literals up to litLimit. -*/ -HINT_INLINE UNUSED_ATTR -void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) -{ - BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; - BYTE const* const litEnd = literals + litLength; -#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) - static const BYTE* g_start = NULL; - if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ - { U32 const pos = (U32)((const BYTE*)literals - g_start); - DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", - pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); - } -#endif - assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); - /* copy Literals */ - assert(seqStorePtr->maxNbLit <= 128 KB); - assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - assert(literals + litLength <= litLimit); - if (litEnd <= litLimit_w) { - /* Common case we can use wildcopy. - * First copy 16 bytes, because literals are likely short. - */ - assert(WILDCOPY_OVERLENGTH >= 16); - ZSTD_copy16(seqStorePtr->lit, literals); - if (litLength > 16) { - ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); - } - } else { - ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); - } - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offset = offCode + 1; - - /* match Length */ - if (mlBase>0xFFFF) { - assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].matchLength = (U16)mlBase; - - seqStorePtr->sequences++; -} - - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes (size_t val) -{ - if (MEM_isLittleEndian()) { - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r=0; - return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } else { /* Big Endian CPU */ - if (MEM_64bits()) { -# if defined(_MSC_VER) && defined(_WIN64) - unsigned long r = 0; - return _BitScanReverse64( &r, val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 4) - return (__builtin_clzll(val) >> 3); -# else - unsigned r; - const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ - if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } else { /* 32 bits */ -# if defined(_MSC_VER) - unsigned long r = 0; - return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } } -} - - -MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) -{ - const BYTE* const pStart = pIn; - const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); - - if (pIn < pInLoopLimit) { - { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (diff) return ZSTD_NbCommonBytes(diff); } - pIn+=sizeof(size_t); pMatch+=sizeof(size_t); - while (pIn < pInLoopLimit) { - size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); - if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } } - if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn> (32-h) ; } -MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } -static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } -static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } -static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } -static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } - -MEM_STATIC size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} - -/** ZSTD_ipow() : - * Return base^exponent. - */ -static U64 ZSTD_ipow(U64 base, U64 exponent) -{ - U64 power = 1; - while (exponent) { - if (exponent & 1) power *= base; - exponent >>= 1; - base *= base; - } - return power; -} - -#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 - -/** ZSTD_rollingHash_append() : - * Add the buffer to the hash value. - */ -static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) -{ - BYTE const* istart = (BYTE const*)buf; - size_t pos; - for (pos = 0; pos < size; ++pos) { - hash *= prime8bytes; - hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; - } - return hash; -} - -/** ZSTD_rollingHash_compute() : - * Compute the rolling hash value of the buffer. - */ -MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) -{ - return ZSTD_rollingHash_append(0, buf, size); -} - -/** ZSTD_rollingHash_primePower() : - * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash - * over a window of length bytes. - */ -MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) -{ - return ZSTD_ipow(prime8bytes, length - 1); -} - -/** ZSTD_rollingHash_rotate() : - * Rotate the rolling hash by one byte. - */ -MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) -{ - hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; - hash *= prime8bytes; - hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; - return hash; -} - -/*-************************************* -* Round buffer management -***************************************/ -#if (ZSTD_WINDOWLOG_MAX_64 > 31) -# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" -#endif -/* Max current allowed */ -#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) -/* Maximum chunk size before overflow correction needs to be called again */ -#define ZSTD_CHUNKSIZE_MAX \ - ( ((U32)-1) /* Maximum ending current index */ \ - - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ - -/** - * ZSTD_window_clear(): - * Clears the window containing the history by simply setting it to empty. - */ -MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) -{ - size_t const endT = (size_t)(window->nextSrc - window->base); - U32 const end = (U32)endT; - - window->lowLimit = end; - window->dictLimit = end; -} - -/** - * ZSTD_window_hasExtDict(): - * Returns non-zero if the window has a non-empty extDict. - */ -MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) -{ - return window.lowLimit < window.dictLimit; -} - -/** - * ZSTD_matchState_dictMode(): - * Inspects the provided matchState and figures out what dictMode should be - * passed to the compressor. - */ -MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) -{ - return ZSTD_window_hasExtDict(ms->window) ? - ZSTD_extDict : - ms->dictMatchState != NULL ? - ZSTD_dictMatchState : - ZSTD_noDict; -} - -/** - * ZSTD_window_needOverflowCorrection(): - * Returns non-zero if the indices are getting too large and need overflow - * protection. - */ -MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, - void const* srcEnd) -{ - U32 const current = (U32)((BYTE const*)srcEnd - window.base); - return current > ZSTD_CURRENT_MAX; -} - -/** - * ZSTD_window_correctOverflow(): - * Reduces the indices to protect from index overflow. - * Returns the correction made to the indices, which must be applied to every - * stored index. - * - * The least significant cycleLog bits of the indices must remain the same, - * which may be 0. Every index up to maxDist in the past must be valid. - * NOTE: (maxDist & cycleMask) must be zero. - */ -MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, - U32 maxDist, void const* src) -{ - /* preemptive overflow correction: - * 1. correction is large enough: - * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) - * > 1<<29 - * - * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: - * After correction, current is less than (1<base < 1<<32. - * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); - U32 const currentCycle0 = current & cycleMask; - /* Exclude zero so that newCurrent - maxDist >= 1. */ - U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; - U32 const newCurrent = currentCycle1 + maxDist; - U32 const correction = current - newCurrent; - assert((maxDist & cycleMask) == 0); - assert(current > newCurrent); - /* Loose bound, should be around 1<<29 (see above) */ - assert(correction > 1<<28); - - window->base += correction; - window->dictBase += correction; - if (window->lowLimit <= correction) window->lowLimit = 1; - else window->lowLimit -= correction; - if (window->dictLimit <= correction) window->dictLimit = 1; - else window->dictLimit -= correction; - - /* Ensure we can still reference the full window. */ - assert(newCurrent >= maxDist); - assert(newCurrent - maxDist >= 1); - /* Ensure that lowLimit and dictLimit didn't underflow. */ - assert(window->lowLimit <= newCurrent); - assert(window->dictLimit <= newCurrent); - - DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, - window->lowLimit); - return correction; -} - -/** - * ZSTD_window_enforceMaxDist(): - * Updates lowLimit so that: - * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd - * - * It ensures index is valid as long as index >= lowLimit. - * This must be called before a block compression call. - * - * loadedDictEnd is only defined if a dictionary is in use for current compression. - * As the name implies, loadedDictEnd represents the index at end of dictionary. - * The value lies within context's referential, it can be directly compared to blockEndIdx. - * - * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. - * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. - * This is because dictionaries are allowed to be referenced fully - * as long as the last byte of the dictionary is in the window. - * Once input has progressed beyond window size, dictionary cannot be referenced anymore. - * - * In normal dict mode, the dictionary lies between lowLimit and dictLimit. - * In dictMatchState mode, lowLimit and dictLimit are the same, - * and the dictionary is below them. - * forceWindow and dictMatchState are therefore incompatible. - */ -MEM_STATIC void -ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) -{ - U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - - /* - When there is no dictionary : loadedDictEnd == 0. - In which case, the test (blockEndIdx > maxDist) is merely to avoid - overflowing next operation `newLowLimit = blockEndIdx - maxDist`. - - When there is a standard dictionary : - Index referential is copied from the dictionary, - which means it starts from 0. - In which case, loadedDictEnd == dictSize, - and it makes sense to compare `blockEndIdx > maxDist + dictSize` - since `blockEndIdx` also starts from zero. - - When there is an attached dictionary : - loadedDictEnd is expressed within the referential of the context, - so it can be directly compared against blockEndIdx. - */ - if (blockEndIdx > maxDist + loadedDictEnd) { - U32 const newLowLimit = blockEndIdx - maxDist; - if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; - if (window->dictLimit < window->lowLimit) { - DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", - (unsigned)window->dictLimit, (unsigned)window->lowLimit); - window->dictLimit = window->lowLimit; - } - /* On reaching window size, dictionaries are invalidated */ - if (loadedDictEndPtr) *loadedDictEndPtr = 0; - if (dictMatchStatePtr) *dictMatchStatePtr = NULL; - } -} - -/* Similar to ZSTD_window_enforceMaxDist(), - * but only invalidates dictionary - * when input progresses beyond window size. - * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) - * loadedDictEnd uses same referential as window->base - * maxDist is the window size */ -MEM_STATIC void -ZSTD_checkDictValidity(const ZSTD_window_t* window, - const void* blockEnd, - U32 maxDist, - U32* loadedDictEndPtr, - const ZSTD_matchState_t** dictMatchStatePtr) -{ - assert(loadedDictEndPtr != NULL); - assert(dictMatchStatePtr != NULL); - { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); - U32 const loadedDictEnd = *loadedDictEndPtr; - DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", - (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); - assert(blockEndIdx >= loadedDictEnd); - - if (blockEndIdx > loadedDictEnd + maxDist) { - /* On reaching window size, dictionaries are invalidated. - * For simplification, if window size is reached anywhere within next block, - * the dictionary is invalidated for the full block. - */ - DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); - *loadedDictEndPtr = 0; - *dictMatchStatePtr = NULL; - } else { - if (*loadedDictEndPtr != 0) { - DEBUGLOG(6, "dictionary considered valid for current block"); - } } } -} - -MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { - memset(window, 0, sizeof(*window)); - window->base = (BYTE const*)""; - window->dictBase = (BYTE const*)""; - window->dictLimit = 1; /* start from 1, so that 1st position is valid */ - window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ - window->nextSrc = window->base + 1; /* see issue #1241 */ -} - -/** - * ZSTD_window_update(): - * Updates the window by appending [src, src + srcSize) to the window. - * If it is not contiguous, the current prefix becomes the extDict, and we - * forget about the extDict. Handles overlap of the prefix and extDict. - * Returns non-zero if the segment is contiguous. - */ -MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, - void const* src, size_t srcSize) -{ - BYTE const* const ip = (BYTE const*)src; - U32 contiguous = 1; - DEBUGLOG(5, "ZSTD_window_update"); - if (srcSize == 0) - return contiguous; - assert(window->base != NULL); - assert(window->dictBase != NULL); - /* Check if blocks follow each other */ - if (src != window->nextSrc) { - /* not contiguous */ - size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); - DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); - window->lowLimit = window->dictLimit; - assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ - window->dictLimit = (U32)distanceFromBase; - window->dictBase = window->base; - window->base = ip - distanceFromBase; - /* ms->nextToUpdate = window->dictLimit; */ - if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ - contiguous = 0; - } - window->nextSrc = ip + srcSize; - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ( (ip+srcSize > window->dictBase + window->lowLimit) - & (ip < window->dictBase + window->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; - window->lowLimit = lowLimitMax; - DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); - } - return contiguous; -} - -/** - * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) -{ - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.lowLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; -} - -/** - * Returns the lowest allowed match index in the prefix. - */ -MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) -{ - U32 const maxDistance = 1U << windowLog; - U32 const lowestValid = ms->window.dictLimit; - U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; - U32 const isDictionary = (ms->loadedDictEnd != 0); - U32 const matchLowest = isDictionary ? lowestValid : withinWindow; - return matchLowest; -} - - - -/* debug functions */ -#if (DEBUGLEVEL>=2) - -MEM_STATIC double ZSTD_fWeight(U32 rawStat) -{ - U32 const fp_accuracy = 8; - U32 const fp_multiplier = (1 << fp_accuracy); - U32 const newStat = rawStat + 1; - U32 const hb = ZSTD_highbit32(newStat); - U32 const BWeight = hb * fp_multiplier; - U32 const FWeight = (newStat << fp_accuracy) >> hb; - U32 const weight = BWeight + FWeight; - assert(hb + fp_accuracy < 31); - return (double)weight / fp_multiplier; -} - -/* display a table content, - * listing each element, its frequency, and its predicted bit cost */ -MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) -{ - unsigned u, sum; - for (u=0, sum=0; u<=max; u++) sum += table[u]; - DEBUGLOG(2, "total nb elts: %u", sum); - for (u=0; u<=max; u++) { - DEBUGLOG(2, "%2u: %5u (%.2f)", - u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); - } -} - -#endif - -/* =============================================================== - * Shared internal declarations - * These prototypes may be called from sources not in lib/compress - * =============================================================== */ - -/* ZSTD_loadCEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * return : size of dictionary header (size of magic number + dict ID + entropy tables) - * assumptions : magic number supposed already checked - * and dictSize >= 8 */ -size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, - short* offcodeNCount, unsigned* offcodeMaxValue, - const void* const dict, size_t dictSize); - -void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); - -/* ============================================================== - * Private declarations - * These prototypes shall only be called from within lib/compress - * ============================================================== */ - -/* ZSTD_getCParamsFromCCtxParams() : - * cParams are built depending on compressionLevel, src size hints, - * LDM and manually set compression parameters. - * Note: srcSizeHint == 0 means 0! - */ -ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( - const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize); - -/*! ZSTD_initCStream_internal() : - * Private use only. Init streaming operation. - * expects params to be valid. - * must receive dict, or cdict, or none, but not both. - * @return : 0, or an error code */ -size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); - -void ZSTD_resetSeqStore(seqStore_t* ssPtr); - -/*! ZSTD_getCParamsFromCDict() : - * as the name implies */ -ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); - -/* ZSTD_compressBegin_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, - const void* dict, size_t dictSize, - ZSTD_dictContentType_e dictContentType, - ZSTD_dictTableLoadMethod_e dtlm, - const ZSTD_CDict* cdict, - const ZSTD_CCtx_params* params, - unsigned long long pledgedSrcSize); - -/* ZSTD_compress_advanced_internal() : - * Private use only. To be called from zstdmt_compress.c. */ -size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - const ZSTD_CCtx_params* params); - - -/* ZSTD_writeLastEmptyBlock() : - * output an empty Block with end-of-frame mark to complete a frame - * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) - * or an error code if `dstCapacity` is too small ( 1 */ -U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); - -} - -#endif /* ZSTD_COMPRESS_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h deleted file mode 100644 index 7082db522..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_LITERALS_H -#define ZSTD_COMPRESS_LITERALS_H - -#include "zstd/compress/zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ - -namespace duckdb_zstd { - -size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* entropyWorkspace, size_t entropyWorkspaceSize, - const int bmi2); - -} - -#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h deleted file mode 100644 index 63e27ea63..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_SEQUENCES_H -#define ZSTD_COMPRESS_SEQUENCES_H - -#include "zstd/common/fse.h" /* FSE_repeat, FSE_CTable */ -#include "zstd/common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ - -namespace duckdb_zstd { - -typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 -} ZSTD_defaultPolicy_e; - -symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy); - -size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* entropyWorkspace, size_t entropyWorkspaceSize); - -size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); - -size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max); - -size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max); - -} - -#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h deleted file mode 100644 index df4055036..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPRESS_ADVANCED_H -#define ZSTD_COMPRESS_ADVANCED_H - -/*-************************************* -* Dependencies -***************************************/ - -#include "zstd.h" /* ZSTD_CCtx */ - -namespace duckdb_zstd { -/*-************************************* -* Target Compressed Block Size -***************************************/ - -/* ZSTD_compressSuperBlock() : - * Used to compress a super block when targetCBlockSize is being used. - * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ -size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - void const* src, size_t srcSize, - unsigned lastBlock); -} - - -#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h deleted file mode 100644 index aea4f469c..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +++ /dev/null @@ -1,525 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_CWKSP_H -#define ZSTD_CWKSP_H - -/*-************************************* -* Dependencies -***************************************/ -#include "zstd/common/zstd_internal.h" - -/*-************************************* -* Constants -***************************************/ - -/* Since the workspace is effectively its own little malloc implementation / - * arena, when we run under ASAN, we should similarly insert redzones between - * each internal element of the workspace, so ASAN will catch overruns that - * reach outside an object but that stay inside the workspace. - * - * This defines the size of that redzone. - */ -#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE -#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 -#endif - -#if defined (MEMORY_SANITIZER) -#include -#endif - -namespace duckdb_zstd { - -/*-************************************* -* Structures -***************************************/ -typedef enum { - ZSTD_cwksp_alloc_objects, - ZSTD_cwksp_alloc_buffers, - ZSTD_cwksp_alloc_aligned -} ZSTD_cwksp_alloc_phase_e; - -/** - * Zstd fits all its internal datastructures into a single continuous buffer, - * so that it only needs to perform a single OS allocation (or so that a buffer - * can be provided to it and it can perform no allocations at all). This buffer - * is called the workspace. - * - * Several optimizations complicate that process of allocating memory ranges - * from this workspace for each internal datastructure: - * - * - These different internal datastructures have different setup requirements: - * - * - The static objects need to be cleared once and can then be trivially - * reused for each compression. - * - * - Various buffers don't need to be initialized at all--they are always - * written into before they're read. - * - * - The matchstate tables have a unique requirement that they don't need - * their memory to be totally cleared, but they do need the memory to have - * some bound, i.e., a guarantee that all values in the memory they've been - * allocated is less than some maximum value (which is the starting value - * for the indices that they will then use for compression). When this - * guarantee is provided to them, they can use the memory without any setup - * work. When it can't, they have to clear the area. - * - * - These buffers also have different alignment requirements. - * - * - We would like to reuse the objects in the workspace for multiple - * compressions without having to perform any expensive reallocation or - * reinitialization work. - * - * - We would like to be able to efficiently reuse the workspace across - * multiple compressions **even when the compression parameters change** and - * we need to resize some of the objects (where possible). - * - * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp - * abstraction was created. It works as follows: - * - * Workspace Layout: - * - * [ ... workspace ... ] - * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] - * - * The various objects that live in the workspace are divided into the - * following categories, and are allocated separately: - * - * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, - * so that literally everything fits in a single buffer. Note: if present, - * this must be the first object in the workspace, since ZSTD_free{CCtx, - * CDict}() rely on a pointer comparison to see whether one or two frees are - * required. - * - * - Fixed size objects: these are fixed-size, fixed-count objects that are - * nonetheless "dynamically" allocated in the workspace so that we can - * control how they're initialized separately from the broader ZSTD_CCtx. - * Examples: - * - Entropy Workspace - * - 2 x ZSTD_compressedBlockState_t - * - CDict dictionary contents - * - * - Tables: these are any of several different datastructures (hash tables, - * chain tables, binary trees) that all respect a common format: they are - * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). - * Their sizes depend on the cparams. - * - * - Aligned: these buffers are used for various purposes that require 4 byte - * alignment, but don't require any initialization before they're used. - * - * - Buffers: these buffers are used for various purposes that don't require - * any alignment or initialization before they're used. This means they can - * be moved around at no cost for a new compression. - * - * Allocating Memory: - * - * The various types of objects must be allocated in order, so they can be - * correctly packed into the workspace buffer. That order is: - * - * 1. Objects - * 2. Buffers - * 3. Aligned - * 4. Tables - * - * Attempts to reserve objects of different types out of order will fail. - */ -typedef struct { - void* workspace; - void* workspaceEnd; - - void* objectEnd; - void* tableEnd; - void* tableValidEnd; - void* allocStart; - - int allocFailed; - int workspaceOversizedDuration; - ZSTD_cwksp_alloc_phase_e phase; -} ZSTD_cwksp; - -/*-************************************* -* Functions -***************************************/ - -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); - -MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { - (void)ws; - assert(ws->workspace <= ws->objectEnd); - assert(ws->objectEnd <= ws->tableEnd); - assert(ws->objectEnd <= ws->tableValidEnd); - assert(ws->tableEnd <= ws->allocStart); - assert(ws->tableValidEnd <= ws->allocStart); - assert(ws->allocStart <= ws->workspaceEnd); -} - -/** - * Align must be a power of 2. - */ -MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { - size_t const mask = align - 1; - assert((align & mask) == 0); - return (size + mask) & ~mask; -} - -/** - * Use this to determine how much space in the workspace we will consume to - * allocate this object. (Normally it should be exactly the size of the object, - * but under special conditions, like ASAN, where we pad each object, it might - * be larger.) - * - * Since tables aren't currently redzoned, you don't need to call through this - * to figure out how much space you need for the matchState tables. Everything - * else is though. - */ -MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#else - return size; -#endif -} - -MEM_STATIC void ZSTD_cwksp_internal_advance_phase( - ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { - assert(phase >= ws->phase); - if (phase > ws->phase) { - if (ws->phase < ZSTD_cwksp_alloc_buffers && - phase >= ZSTD_cwksp_alloc_buffers) { - ws->tableValidEnd = ws->objectEnd; - } - if (ws->phase < ZSTD_cwksp_alloc_aligned && - phase >= ZSTD_cwksp_alloc_aligned) { - /* If unaligned allocations down from a too-large top have left us - * unaligned, we need to realign our alloc ptr. Technically, this - * can consume space that is unaccounted for in the neededSpace - * calculation. However, I believe this can only happen when the - * workspace is too large, and specifically when it is too large - * by a larger margin than the space that will be consumed. */ - /* TODO: cleaner, compiler warning friendly way to do this??? */ - ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); - if (ws->allocStart < ws->tableValidEnd) { - ws->tableValidEnd = ws->allocStart; - } - } - ws->phase = phase; - } -} - -/** - * Returns whether this object/buffer/etc was allocated in this workspace. - */ -MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { - return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); -} - -/** - * Internal function. Do not use directly. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_internal( - ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { - void* alloc; - void* bottom = ws->tableEnd; - ZSTD_cwksp_internal_advance_phase(ws, phase); - alloc = (BYTE *)ws->allocStart - bytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - alloc = (BYTE *)alloc - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - - DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(alloc >= bottom); - if (alloc < bottom) { - DEBUGLOG(4, "cwksp: alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - if (alloc < ws->tableValidEnd) { - ws->tableValidEnd = alloc; - } - ws->allocStart = alloc; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -/** - * Reserves and returns unaligned memory. - */ -MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { - return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); -} - -/** - * Reserves and returns memory sized on and aligned on sizeof(unsigned). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { - assert((bytes & (sizeof(U32)-1)) == 0); - return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); -} - -/** - * Aligned on sizeof(unsigned). These buffers have the special property that - * their values remain constrained, allowing us to re-use them without - * memset()-ing them. - */ -MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { - const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; - void* alloc = ws->tableEnd; - void* end = (BYTE *)alloc + bytes; - void* top = ws->allocStart; - - DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", - alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); - assert((bytes & (sizeof(U32)-1)) == 0); - ZSTD_cwksp_internal_advance_phase(ws, phase); - ZSTD_cwksp_assert_internal_consistency(ws); - assert(end <= top); - if (end > top) { - DEBUGLOG(4, "cwksp: table alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->tableEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -/** - * Aligned on sizeof(void*). - */ -MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { - size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); - void* alloc = ws->objectEnd; - void* end = (BYTE*)alloc + roundedBytes; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* over-reserve space */ - end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; -#endif - - DEBUGLOG(5, - "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", - alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); - assert(((size_t)alloc & (sizeof(void*)-1)) == 0); - assert((bytes & (sizeof(void*)-1)) == 0); - ZSTD_cwksp_assert_internal_consistency(ws); - /* we must be in the first phase, no advance is possible */ - if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { - DEBUGLOG(4, "cwksp: object alloc failed!"); - ws->allocFailed = 1; - return NULL; - } - ws->objectEnd = end; - ws->tableEnd = end; - ws->tableValidEnd = end; - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on - * either size. */ - alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; - __asan_unpoison_memory_region(alloc, bytes); -#endif - - return alloc; -} - -MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the table re-use logic is sound, and that we don't - * access table space that we haven't cleaned, we re-"poison" the table - * space every time we mark it dirty. */ - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - assert(__msan_test_shadow(ws->objectEnd, size) == -1); - __msan_poison(ws->objectEnd, size); - } -#endif - - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - ws->tableValidEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - ws->tableValidEnd = ws->tableEnd; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * Zero the part of the allocated tables not already marked clean. - */ -MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); - assert(ws->tableValidEnd >= ws->objectEnd); - assert(ws->tableValidEnd <= ws->allocStart); - if (ws->tableValidEnd < ws->tableEnd) { - memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); - } - ZSTD_cwksp_mark_tables_clean(ws); -} - -/** - * Invalidates table allocations. - * All other allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing tables!"); - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); - } -#endif - - ws->tableEnd = ws->objectEnd; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * Invalidates all buffer, aligned, and table allocations. - * Object allocations remain valid. - */ -MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { - DEBUGLOG(4, "cwksp: clearing!"); - -#if defined (MEMORY_SANITIZER) && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) - /* To validate that the context re-use logic is sound, and that we don't - * access stuff that this compression hasn't initialized, we re-"poison" - * the workspace (or at least the non-static, non-table parts of it) - * every time we start a new compression. */ - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; - __msan_poison(ws->tableValidEnd, size); - } -#endif - -#if defined (ADDRESS_SANITIZER) && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) - { - size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; - __asan_poison_memory_region(ws->objectEnd, size); - } -#endif - - ws->tableEnd = ws->objectEnd; - ws->allocStart = ws->workspaceEnd; - ws->allocFailed = 0; - if (ws->phase > ZSTD_cwksp_alloc_buffers) { - ws->phase = ZSTD_cwksp_alloc_buffers; - } - ZSTD_cwksp_assert_internal_consistency(ws); -} - -/** - * The provided workspace takes ownership of the buffer [start, start+size). - * Any existing values in the workspace are ignored (the previously managed - * buffer, if present, must be separately freed). - */ -MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) { - DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); - assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ - ws->workspace = start; - ws->workspaceEnd = (BYTE*)start + size; - ws->objectEnd = ws->workspace; - ws->tableValidEnd = ws->objectEnd; - ws->phase = ZSTD_cwksp_alloc_objects; - ZSTD_cwksp_clear(ws); - ws->workspaceOversizedDuration = 0; - ZSTD_cwksp_assert_internal_consistency(ws); -} - -MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { - void* workspace = ZSTD_malloc(size, customMem); - DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); - RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); - ZSTD_cwksp_init(ws, workspace, size); - return 0; -} - -MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { - void *ptr = ws->workspace; - DEBUGLOG(4, "cwksp: freeing workspace"); - memset(ws, 0, sizeof(ZSTD_cwksp)); - ZSTD_free(ptr, customMem); -} - -/** - * Moves the management of a workspace from one cwksp to another. The src cwksp - * is left in an invalid state (src must be re-init()'ed before its used again). - */ -MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { - *dst = *src; - memset(src, 0, sizeof(ZSTD_cwksp)); -} - -MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); -} - -MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { - return ws->allocFailed; -} - -/*-************************************* -* Functions Checking Free Space -***************************************/ - -MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { - return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); -} - -MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; -} - -MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_available( - ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); -} - -MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { - return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) - && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; -} - -MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( - ZSTD_cwksp* ws, size_t additionalNeededSpace) { - if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { - ws->workspaceOversizedDuration++; - } else { - ws->workspaceOversizedDuration = 0; - } -} - -} - -#endif /* ZSTD_CWKSP_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h deleted file mode 100644 index 7991711fa..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_DOUBLE_FAST_H -#define ZSTD_DOUBLE_FAST_H - -#include "zstd/common/mem.h" /* U32 */ -#include "zstd/compress/zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ - -namespace duckdb_zstd { - -void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_doubleFast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_doubleFast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h deleted file mode 100644 index a75839a06..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_FAST_H -#define ZSTD_FAST_H - -#include "zstd/common/mem.h" /* U32 */ -#include "zstd/compress/zstd_compress_internal.h" - -namespace duckdb_zstd { - -void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm); -size_t ZSTD_compressBlock_fast( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_fast_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_FAST_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h deleted file mode 100644 index 555edbf1b..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_LAZY_H -#define ZSTD_LAZY_H - -#include "zstd/compress/zstd_compress_internal.h" - -namespace duckdb_zstd { - -U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); - -void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ - -size_t ZSTD_compressBlock_btlazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_btlazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_greedy_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_greedy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_lazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btlazy2_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -} - -#endif /* ZSTD_LAZY_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h deleted file mode 100644 index d2640c776..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_LDM_H -#define ZSTD_LDM_H - -#include "zstd/compress/zstd_compress_internal.h" /* ldmParams_t, U32 */ -#include "zstd.h" /* ZSTD_CCtx, size_t */ - -/*-************************************* -* Long distance matching -***************************************/ - -#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT - -namespace duckdb_zstd { - -void ZSTD_ldm_fillHashTable( - ldmState_t* state, const BYTE* ip, - const BYTE* iend, ldmParams_t const* params); - -/** - * ZSTD_ldm_generateSequences(): - * - * Generates the sequences using the long distance match finder. - * Generates long range matching sequences in `sequences`, which parse a prefix - * of the source. `sequences` must be large enough to store every sequence, - * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. - * @returns 0 or an error code. - * - * NOTE: The user must have called ZSTD_window_update() for all of the input - * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. - * NOTE: This function returns an error if it runs out of space to store - * sequences. - */ -size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeqStore_t* sequences, - ldmParams_t const* params, void const* src, size_t srcSize); - -/** - * ZSTD_ldm_blockCompress(): - * - * Compresses a block using the predefined sequences, along with a secondary - * block compressor. The literals section of every sequence is passed to the - * secondary block compressor, and those sequences are interspersed with the - * predefined sequences. Returns the length of the last literals. - * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. - * `rawSeqStore.seq` may also be updated to split the last sequence between two - * blocks. - * @return The length of the last literals. - * - * NOTE: The source must be at most the maximum block size, but the predefined - * sequences can be any size, and may be longer than the block. In the case that - * they are longer than the block, the last sequences may need to be split into - * two. We handle that case correctly, and update `rawSeqStore` appropriately. - * NOTE: This function does not return any errors. - */ -size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -/** - * ZSTD_ldm_skipSequences(): - * - * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. - * Avoids emitting matches less than `minMatch` bytes. - * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). - */ -void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, - U32 const minMatch); - - -/** ZSTD_ldm_getTableSize() : - * Estimate the space needed for long distance matching tables or 0 if LDM is - * disabled. - */ -size_t ZSTD_ldm_getTableSize(ldmParams_t params); - -/** ZSTD_ldm_getSeqSpace() : - * Return an upper bound on the number of sequences that can be produced by - * the long distance matcher, or 0 if LDM is disabled. - */ -size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); - -/** ZSTD_ldm_adjustParameters() : - * If the params->hashRateLog is not set, set it to its default value based on - * windowLog and params->hashLog. - * - * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to - * params->hashLog if it is not). - * - * Ensures that the minMatchLength >= targetLength during optimal parsing. - */ -void ZSTD_ldm_adjustParameters(ldmParams_t* params, - ZSTD_compressionParameters const* cParams); - -} - -#endif /* ZSTD_FAST_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h b/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h deleted file mode 100644 index b0d7bc3e2..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_OPT_H -#define ZSTD_OPT_H - -#include "zstd/compress/zstd_compress_internal.h" - -namespace duckdb_zstd { - -/* used in ZSTD_loadDictionaryContent() */ -void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); - -size_t ZSTD_compressBlock_btopt( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra2( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - - -size_t ZSTD_compressBlock_btopt_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_dictMatchState( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - -size_t ZSTD_compressBlock_btopt_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); -size_t ZSTD_compressBlock_btultra_extDict( - ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], - void const* src, size_t srcSize); - - /* note : no btultra2 variant for extDict nor dictMatchState, - * because btultra2 is not meant to work with dictionaries - * and is only specific for the first block (no prefix) */ - -} - -#endif /* ZSTD_OPT_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h b/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h deleted file mode 100644 index a95f384fb..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#ifndef ZSTD_DDICT_H -#define ZSTD_DDICT_H - -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ -#include "zstd.h" /* ZSTD_DDict, and several public functions */ - -namespace duckdb_zstd { -/*-******************************************************* - * Interface - *********************************************************/ - -/* note: several prototypes are already published in `zstd.h` : - * ZSTD_createDDict() - * ZSTD_createDDict_byReference() - * ZSTD_createDDict_advanced() - * ZSTD_freeDDict() - * ZSTD_initStaticDDict() - * ZSTD_sizeof_DDict() - * ZSTD_estimateDDictSize() - * ZSTD_getDictID_fromDict() - */ - -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); - -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -} - -#endif /* ZSTD_DDICT_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h b/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h deleted file mode 100644 index 46d4a2103..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -#ifndef ZSTD_DEC_BLOCK_H -#define ZSTD_DEC_BLOCK_H - -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ -#include "zstd.h" /* DCtx, and some public functions */ -#include "zstd/common/zstd_internal.h" /* blockProperties_t, and some public functions */ -#include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_seqSymbol */ - -namespace duckdb_zstd { - -/* === Prototypes === */ - -/* note: prototypes already published within `zstd.h` : - * ZSTD_decompressBlock() - */ - -/* note: prototypes already published within `zstd_internal.h` : - * ZSTD_getcBlockSize() - * ZSTD_decodeSeqHeaders() - */ - - -/* ZSTD_decompressBlock_internal() : - * decompress block, starting at `src`, - * into destination buffer `dst`. - * @return : decompressed block size, - * or an error code (which can be tested using ZSTD_isError()) - */ -size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, const int frame); - -/* ZSTD_buildFSETable() : - * generate FSE decoding table for one symbol (ll, ml or off) - * this function must be called with valid parameters only - * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) - * in which case it cannot fail. - * Internal use only. - */ -void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, - const short* normalizedCounter, unsigned maxSymbolValue, - const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); - -} - -#endif /* ZSTD_DEC_BLOCK_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h b/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h deleted file mode 100644 index 6ff422e2c..000000000 --- a/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - - -/* zstd_decompress_internal: - * objects and definitions shared within lib/decompress modules */ - - #ifndef ZSTD_DECOMPRESS_INTERNAL_H - #define ZSTD_DECOMPRESS_INTERNAL_H - - -/*-******************************************************* - * Dependencies - *********************************************************/ -#include "zstd/common/mem.h" /* BYTE, U16, U32 */ -#include "zstd/common/zstd_internal.h" /* ZSTD_seqSymbol */ - -namespace duckdb_zstd { - -/*-******************************************************* - * Constants - *********************************************************/ -struct ZSTDConstants { - static const U32 LL_base[MaxLL+1]; - static const U32 OF_base[MaxOff+1]; - static const U32 OF_bits[MaxOff+1]; - static const U32 ML_base[MaxML+1]; -}; - - -/*-******************************************************* - * Decompression types - *********************************************************/ - typedef struct { - U32 fastMode; - U32 tableLog; - } ZSTD_seqSymbol_header; - - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; - - #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) - -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; - -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; - -typedef enum { zdss_init=0, zdss_loadHeader, - zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; - -typedef enum { - ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ - ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ - ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ -} ZSTD_dictUses_e; - -typedef enum { - ZSTD_obm_buffered = 0, /* Buffer the output */ - ZSTD_obm_stable = 1 /* ZSTD_outBuffer is stable */ -} ZSTD_outBufferMode_e; - -struct ZSTD_DCtx_s -{ - const ZSTD_seqSymbol* LLTptr; - const ZSTD_seqSymbol* MLTptr; - const ZSTD_seqSymbol* OFTptr; - const HUF_DTable* HUFptr; - ZSTD_entropyDTables_t entropy; - U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ - const void* previousDstEnd; /* detect continuity */ - const void* prefixStart; /* start of current segment */ - const void* virtualStart; /* virtual start of previous segment if it was just before current one */ - const void* dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameHeader fParams; - U64 decodedSize; - blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - XXH64_state_t xxhState; - size_t headerSize; - ZSTD_format_e format; - const BYTE* litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - size_t staticSize; - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - - /* dictionary */ - ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ - U32 dictID; - int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ - ZSTD_dictUses_e dictUses; - - /* streaming */ - ZSTD_dStreamStage streamStage; - char* inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char* outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; - int noForwardProgress; - ZSTD_outBufferMode_e outBufferMode; - ZSTD_outBuffer expectedOutBuffer; - - /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; - - size_t oversizedDuration; - -#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - void const* dictContentBeginForFuzzing; - void const* dictContentEndForFuzzing; -#endif -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - - -/*-******************************************************* - * Shared internal functions - *********************************************************/ - -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); - -/*! ZSTD_checkContinuity() : - * check if next `dst` follows previous position, where decompression ended. - * If yes, do nothing (continue on current segment). - * If not, classify previous segment as "external dictionary", and start a new segment. - * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); - -} - -#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/src/duckdb/third_party/zstd/include/zstd_static.h b/src/duckdb/third_party/zstd/include/zstd_static.h deleted file mode 100644 index e4f1f3b05..000000000 --- a/src/duckdb/third_party/zstd/include/zstd_static.h +++ /dev/null @@ -1,1070 +0,0 @@ - -/* ************************************************************************************** - * ADVANCED AND EXPERIMENTAL FUNCTIONS - **************************************************************************************** - * The definitions in the following section are considered experimental. - * They are provided for advanced scenarios. - * They should never be used with a dynamic library, as prototypes may change in the future. - * Use them only in association with static linking. - * ***************************************************************************************/ - -#ifndef ZSTD_H_ZSTD_STATIC_LINKING_ONLY -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - -namespace duckdb_zstd { - -/**************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** - * The following symbols and constants - * are not planned to join "stable API" status in the near future. - * They can still change in future versions. - * Some of them are planned to remain in the static_only section indefinitely. - * Some of them might be removed in the future (especially when redundant with existing stable functions) - * ***************************************************************************************/ - -#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ -#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ -#define ZSTD_SKIPPABLEHEADERSIZE 8 - -/* compression parameter bounds */ -#define ZSTD_WINDOWLOG_MAX_32 30 -#define ZSTD_WINDOWLOG_MAX_64 31 -#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX_32 29 -#define ZSTD_CHAINLOG_MAX_64 30 -#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX -#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ -#define ZSTD_STRATEGY_MIN ZSTD_fast -#define ZSTD_STRATEGY_MAX ZSTD_btultra2 - - -#define ZSTD_OVERLAPLOG_MIN 0 -#define ZSTD_OVERLAPLOG_MAX 9 - -#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame - * requiring larger than (1< 3, then this is seqDef.offset - 3 - * If seqDef.offset < 3, then this is the corresponding repeat offset - * But if seqDef.offset < 3 and litLength == 0, this is the - * repeat offset before the corresponding repeat offset - * And if seqDef.offset == 3 and litLength == 0, this is the - * most recent repeat offset - 1 - */ - unsigned int offset; - unsigned int litLength; /* Literal length */ - unsigned int matchLength; /* Match length */ - /* 0 when seq not rep and seqDef.offset otherwise - * when litLength == 0 this will be <= 4, otherwise <= 3 like normal - */ - unsigned int rep; -} ZSTD_Sequence; - -typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ -} ZSTD_compressionParameters; - -typedef struct { - int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ - int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ -} ZSTD_frameParameters; - -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; - -typedef enum { - ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ - ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ - ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ -} ZSTD_dictContentType_e; - -typedef enum { - ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ -} ZSTD_dictLoadMethod_e; - -typedef enum { - ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ - ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. - * Useful to save 4 bytes per generated frame. - * Decoder cannot recognise automatically this format, requiring this instruction. */ -} ZSTD_format_e; - -typedef enum { - /* Note: this enum and the behavior it controls are effectively internal - * implementation details of the compressor. They are expected to continue - * to evolve and should be considered only in the context of extremely - * advanced performance tuning. - * - * Zstd currently supports the use of a CDict in three ways: - * - * - The contents of the CDict can be copied into the working context. This - * means that the compression can search both the dictionary and input - * while operating on a single set of internal tables. This makes - * the compression faster per-byte of input. However, the initial copy of - * the CDict's tables incurs a fixed cost at the beginning of the - * compression. For small compressions (< 8 KB), that copy can dominate - * the cost of the compression. - * - * - The CDict's tables can be used in-place. In this model, compression is - * slower per input byte, because the compressor has to search two sets of - * tables. However, this model incurs no start-up cost (as long as the - * working context's tables can be reused). For small inputs, this can be - * faster than copying the CDict's tables. - * - * - The CDict's tables are not used at all, and instead we use the working - * context alone to reload the dictionary and use params based on the source - * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). - * This method is effective when the dictionary sizes are very small relative - * to the input size, and the input size is fairly large to begin with. - * - * Zstd has a simple internal heuristic that selects which strategy to use - * at the beginning of a compression. However, if experimentation shows that - * Zstd is making poor choices, it is possible to override that choice with - * this enum. - */ - ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ - ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ - ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ -} ZSTD_dictAttachPref_e; - -typedef enum { - ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. - * Negative compression levels will be uncompressed, and positive compression - * levels will be compressed. */ - ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be - * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ -} ZSTD_literalCompressionMode_e; - - -/*************************************** -* Frame size functions -***************************************/ - -/*! ZSTD_findDecompressedSize() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - decompressed size of all data in all successive frames - * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN - * - if an error occurred: ZSTD_CONTENTSIZE_ERROR - * - * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * note 2 : decompressed size is always present when compression is done with ZSTD_compress() - * note 3 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure result fits within application's authorized limits. - * Each application can set its own limits. - * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to - * read each contained frame header. This is fast as most of the data is skipped, - * however it does mean that all frame data must be present and valid. */ -ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); - -/*! ZSTD_decompressBound() : - * `src` should point to the start of a series of ZSTD encoded and/or skippable frames - * `srcSize` must be the _exact_ size of this series - * (i.e. there should be a frame boundary at `src + srcSize`) - * @return : - upper-bound for the decompressed size of all data in all successive frames - * - if an error occured: ZSTD_CONTENTSIZE_ERROR - * - * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. - * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. - * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. - * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: - * upper-bound = # blocks * min(128 KB, Window_Size) - */ -ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); - -/*! ZSTD_frameHeaderSize() : - * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); - -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * @return : number of sequences extracted - */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); - - -/*************************************** -* Memory management -***************************************/ - -/*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage - * of a future {D,C}Ctx, before its creation. - * - * ZSTD_estimateCCtxSize() will provide a memory budget large enough - * for any compression level up to selected one. - * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate - * does not include space for a window buffer. - * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. - * The estimate will assume the input may be arbitrarily large, - * which is the worst case. - * - * When srcSize can be bound by a known and rather "small" value, - * this fact can be used to provide a tighter estimation - * because the CCtx compression context will need less memory. - * This tighter estimation can be provided by more advanced functions - * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), - * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). - * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. - * - * Note 2 : only single-threaded compression is supported. - * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. - */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); - -/*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); - -/*! ZSTD_estimate?DictSize() : - * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). - * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); - -/*! ZSTD_initStatic*() : - * Initialize an object using a pre-allocated fixed-size buffer. - * workspace: The memory area to emplace the object into. - * Provided pointer *must be 8-bytes aligned*. - * Buffer must outlive object. - * workspaceSize: Use ZSTD_estimate*Size() to determine - * how large workspace must be to support target scenario. - * @return : pointer to object (same address as workspace, just different type), - * or NULL if error (size too small, incorrect alignment, etc.) - * Note : zstd will never resize nor malloc() when using a static buffer. - * If the object requires more memory than available, - * zstd will just error out (typically ZSTD_error_memory_allocation). - * Note 2 : there is no corresponding "free" function. - * Since workspace is allocated externally, it must be freed externally too. - * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level - * into its associated cParams. - * Limitation 1 : currently not compatible with internal dictionary creation, triggered by - * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). - * Limitation 2 : static cctx currently not compatible with multi-threading. - * Limitation 3 : static dctx is incompatible with legacy support. - */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ - -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ - -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams); - -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType); - - -/*! Custom memory allocation : - * These prototypes make it possible to pass your own allocation/free functions. - * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. - * All allocation/free operations will be completed using these custom variants instead of regular ones. - */ -typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; - -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); - -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, - ZSTD_customMem customMem); - -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); - - - -/*************************************** -* Advanced compression functions -***************************************/ - -/*! ZSTD_createCDict_byReference() : - * Create a digested dictionary for compression - * Dictionary content is just referenced, not duplicated. - * As a consequence, `dictBuffer` **must** outlive CDict, - * and its content must remain unmodified throughout the lifetime of CDict. - * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); - -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); - -/*! ZSTD_getParams() : - * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); - -/*! ZSTD_checkCParams() : - * Ensure param values remain within authorized range. - * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); - -/*! ZSTD_adjustCParams() : - * optimize params for a given `srcSize` and `dictSize`. - * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. - * `dictSize` must be `0` when there is no dictionary. - * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. - * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); - -/*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); - -/*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams); - - -/*! ZSTD_CCtx_loadDictionary_byReference() : - * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - -/*! ZSTD_CCtx_loadDictionary_advanced() : - * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_CCtx_refPrefix_advanced() : - * Same as ZSTD_CCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); - -/* === experimental parameters === */ -/* these parameters can be used with ZSTD_setParameter() - * they are not guaranteed to remain supported in the future */ - - /* Enables rsyncable mode, - * which makes compressed files more rsync friendly - * by adding periodic synchronization points to the compressed data. - * The target average block size is ZSTD_c_jobSize / 2. - * It's possible to modify the job size to increase or decrease - * the granularity of the synchronization point. - * Once the jobSize is smaller than the window size, - * it will result in compression ratio degradation. - * NOTE 1: rsyncable mode only works when multithreading is enabled. - * NOTE 2: rsyncable performs poorly in combination with long range mode, - * since it will decrease the effectiveness of synchronization points, - * though mileage may vary. - * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. - * If the selected compression level is already running significantly slower, - * the overall speed won't be significantly impacted. - */ - #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 - -/* Select a compression format. - * The value must be of type ZSTD_format_e. - * See ZSTD_format_e enum definition for details */ -#define ZSTD_c_format ZSTD_c_experimentalParam2 - -/* Force back-reference distances to remain < windowSize, - * even when referencing into Dictionary content (default:0) */ -#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 - -/* Controls whether the contents of a CDict - * are used in place, or copied into the working context. - * Accepts values from the ZSTD_dictAttachPref_e enum. - * See the comments on that enum for an explanation of the feature. */ -#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 - -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. - */ -#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 - -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 - -/* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, - * but compression ratio may regress significantly if guess considerably underestimates */ -#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 - -/*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. - * @return : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); - - -/*! ZSTD_CCtx_params : - * Quick howto : - * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into - * an existing ZSTD_CCtx_params structure. - * This is similar to - * ZSTD_CCtx_setParameter(). - * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to - * an existing CCtx. - * These parameters will be applied to - * all subsequent frames. - * - ZSTD_compressStream2() : Do compression using the CCtx. - * - ZSTD_freeCCtxParams() : Free the memory. - * - * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() - * for static allocation of CCtx for single-threaded compression. - */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); - -/*! ZSTD_CCtxParams_reset() : - * Reset params to default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); - -/*! ZSTD_CCtxParams_init() : - * Initializes the compression parameters of cctxParams according to - * compression level. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); - -/*! ZSTD_CCtxParams_init_advanced() : - * Initializes the compression and frame parameters of cctxParams according to - * params. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); - -/*! ZSTD_CCtxParams_setParameter() : - * Similar to ZSTD_CCtx_setParameter. - * Set one compression parameter, selected by enum ZSTD_cParameter. - * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); - -/*! ZSTD_CCtxParams_getParameter() : - * Similar to ZSTD_CCtx_getParameter. - * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); - -/*! ZSTD_CCtx_setParametersUsingCCtxParams() : - * Apply a set of ZSTD_CCtx_params to the compression context. - * This can be done even after compression is started, - * if nbWorkers==0, this will have no impact until a new compression is started. - * if nbWorkers>=1, new parameters will be picked up at next job, - * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); - -/*! ZSTD_compressStream2_simpleArgs() : - * Same as ZSTD_compressStream2(), - * but using only integral types as arguments. - * This variant might be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp); - - -/*************************************** -* Advanced decompression functions -***************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); - -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * Dictionary content is referenced, and therefore stays in dictBuffer. - * It is important that dictBuffer outlives DDict, - * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); - -/*! ZSTD_DCtx_loadDictionary_byReference() : - * Same as ZSTD_DCtx_loadDictionary(), - * but references `dict` content instead of copying it into `dctx`. - * This saves memory if `dict` remains around., - * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); - -/*! ZSTD_DCtx_loadDictionary_advanced() : - * Same as ZSTD_DCtx_loadDictionary(), - * but gives direct control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_DCtx_refPrefix_advanced() : - * Same as ZSTD_DCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); - -/*! ZSTD_DCtx_setMaxWindowSize() : - * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. - * This protects a decoder context from reserving too much memory for itself (potential attack scenario). - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); - -/* ZSTD_d_format - * experimental parameter, - * allowing selection between ZSTD_format_e input compression formats - */ -#define ZSTD_d_format ZSTD_d_experimentalParam1 -/* ZSTD_d_stableOutBuffer - * Experimental parameter. - * Default is 0 == disabled. Set to 1 to enable. - * - * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same - * between calls, except for the modifications that zstd makes to pos (the - * caller must not modify pos). This is checked by the decompressor, and - * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer - * MUST be large enough to fit the entire decompressed frame. This will be - * checked when the frame content size is known. The data in the ZSTD_outBuffer - * in the range [dst, dst + pos) MUST not be modified during decompression - * or you will get data corruption. - * - * When this flags is enabled zstd won't allocate an output buffer, because - * it can write directly to the ZSTD_outBuffer, but it will still allocate - * an input buffer large enough to fit any compressed block. This will also - * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. - * If you need to avoid the input buffer allocation use the buffer-less - * streaming API. - * - * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using - * this flag is ALWAYS memory safe, and will never access out-of-bounds - * memory. However, decompression WILL fail if you violate the preconditions. - * - * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST - * not be modified during decompression or you will get data corruption. This - * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate - * matches. Normally zstd maintains its own buffer for this purpose, but passing - * this flag tells zstd to use the user provided buffer. - */ -#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 - -/*! ZSTD_DCtx_setFormat() : - * Instruct the decoder context about what kind of data to decode next. - * This instruction is mandatory to decode data without a fully-formed header, - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); - -/*! ZSTD_decompressStream_simpleArgs() : - * Same as ZSTD_decompressStream(), - * but using only integral types as arguments. - * This can be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos); - - -/******************************************************************** -* Advanced streaming functions -* Warning : most of these functions are now redundant with the Advanced API. -* Once Advanced API reaches "stable" status, -* redundant functions will be deprecated, and then at some point removed. -********************************************************************/ - -/*===== Advanced Streaming compression functions =====*/ -/**! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * pledgedSrcSize must be correct. If it is not known at init time, use - * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, - * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); - -/**! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * Creates of an internal CDict (incompatible with static CCtx), except if - * dict == NULL or dictSize < 8, in which case no dict is used. - * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if - * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); - -/**! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. - * pledgedSrcSize must be correct. - * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize); - -/**! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); - -/**! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. - * pledgedSrcSize must be correct. If srcSize is not known at init time, use - * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize); - -/*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); - - -typedef struct { - unsigned long long ingested; /* nb input bytes read and buffered */ - unsigned long long consumed; /* nb input bytes actually compressed */ - unsigned long long produced; /* nb of compressed bytes generated and buffered */ - unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ - unsigned currentJobID; /* MT only : latest started job nb */ - unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ -} ZSTD_frameProgression; - -/* ZSTD_getFrameProgression() : - * tells how much data has been ingested (read from input) - * consumed (input actually compressed) and produced (output) for current frame. - * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. - * Aggregates progression inside active worker threads. - */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); - -/*! ZSTD_toFlushNow() : - * Tell how many bytes are ready to be flushed immediately. - * Useful for multithreading scenarios (nbWorkers >= 1). - * Probe the oldest active job, defined as oldest job not yet entirely flushed, - * and check its output buffer. - * @return : amount of data stored in oldest job and ready to be flushed immediately. - * if @return == 0, it means either : - * + there is no active job (could be checked with ZSTD_frameProgression()), or - * + oldest job is still actively compressing data, - * but everything it has produced has also been flushed so far, - * therefore flush speed is limited by production speed of oldest job - * irrespective of the speed of concurrent (and newer) jobs. - */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); - - -/*===== Advanced Streaming decompression functions =====*/ -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); - -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); - -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); - - -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. -********************************************************************* */ - -/** - Buffer-less streaming compression (synchronous mode) - - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. - - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() - - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. - - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. - - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. - - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -*/ - -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - - -/*- - Buffer-less streaming decompression (synchronous mode) - - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. - - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. - For example, do not allocate memory blindly, check that `windowSize` is within expectation. - Each application can set its own limits, depending on local restrictions. - For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. - - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference distance. - There are multiple ways to guarantee this condition. - - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. - At which point, decoding can resume from the beginning of the buffer. - Note that already decoded data stored in the buffer should be flushed before being overwritten. - - There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. - - Finally, if you control the compression process, you can also ignore all buffer size rules, - as long as the encoder and decoder progress in "lock-step", - aka use exactly the same buffer sizes, break contiguity at the same place, etc. - - Once buffers are setup, start decompression, with ZSTD_decompressBegin(). - If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). - - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. - - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). - - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. - - == Special case : skippable frames == - - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by decompressor. - The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. - For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. -*/ - -/*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; - -/*! ZSTD_getFrameHeader() : - * decode Frame Header, or requires larger `srcSize`. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ - -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); - -/* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); - - - - -/* ============================ */ -/** Block level API */ -/* ============================ */ - -/*! - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. - - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! - ===> In which case, nothing is produced into `dst` ! - + User __must__ test for such outcome and deal directly with uncompressed data - + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. - Doing so would mess up with statistics history, leading to potential data corruption. - + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! - + In case of multiple successive blocks, should some of them be uncompressed, - decoder must be informed of their existence in order to follow proper history. - Use ZSTD_insertBlock() for such a case. -*/ - -/*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ - -} - -#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ diff --git a/src/include/sources.mk b/src/include/sources.mk index 411696555..5d13a21e1 100644 --- a/src/include/sources.mk +++ b/src/include/sources.mk @@ -1 +1 @@ -SOURCES=duckdb/ub_src_catalog.o duckdb/ub_src_catalog_catalog_entry.o duckdb/ub_src_catalog_default.o duckdb/ub_src_common_adbc.o duckdb/ub_src_common_adbc_nanoarrow.o duckdb/ub_src_common.o duckdb/ub_src_common_arrow_appender.o duckdb/ub_src_common_arrow.o duckdb/ub_src_common_crypto.o duckdb/ub_src_common_enums.o duckdb/ub_src_common_operator.o duckdb/ub_src_common_progress_bar.o duckdb/ub_src_common_row_operations.o duckdb/ub_src_common_serializer.o duckdb/ub_src_common_sort.o duckdb/ub_src_common_types.o duckdb/ub_src_common_types_column.o duckdb/ub_src_common_types_row.o duckdb/ub_src_common_value_operations.o duckdb/src/common/vector_operations/boolean_operators.o duckdb/src/common/vector_operations/comparison_operators.o duckdb/src/common/vector_operations/generators.o duckdb/src/common/vector_operations/is_distinct_from.o duckdb/src/common/vector_operations/null_operations.o duckdb/src/common/vector_operations/numeric_inplace_operators.o duckdb/src/common/vector_operations/vector_cast.o duckdb/src/common/vector_operations/vector_copy.o duckdb/src/common/vector_operations/vector_hash.o duckdb/src/common/vector_operations/vector_storage.o duckdb/ub_src_core_functions_aggregate_algebraic.o duckdb/ub_src_core_functions_aggregate_distributive.o duckdb/ub_src_core_functions_aggregate_holistic.o duckdb/ub_src_core_functions_aggregate_nested.o duckdb/ub_src_core_functions_aggregate_regression.o duckdb/ub_src_core_functions.o duckdb/ub_src_core_functions_scalar_bit.o duckdb/ub_src_core_functions_scalar_blob.o duckdb/ub_src_core_functions_scalar_date.o duckdb/ub_src_core_functions_scalar_debug.o duckdb/ub_src_core_functions_scalar_enum.o duckdb/ub_src_core_functions_scalar_generic.o duckdb/ub_src_core_functions_scalar_list.o duckdb/ub_src_core_functions_scalar_map.o duckdb/ub_src_core_functions_scalar_math.o duckdb/ub_src_core_functions_scalar_operators.o duckdb/ub_src_core_functions_scalar_random.o duckdb/ub_src_core_functions_scalar_string.o duckdb/ub_src_core_functions_scalar_struct.o duckdb/ub_src_core_functions_scalar_union.o duckdb/ub_src_execution.o duckdb/ub_src_execution_expression_executor.o duckdb/ub_src_execution_index_art.o duckdb/ub_src_execution_nested_loop_join.o duckdb/ub_src_execution_operator_aggregate.o duckdb/ub_src_execution_operator_csv_scanner.o duckdb/ub_src_execution_operator_csv_scanner_sniffer.o duckdb/ub_src_execution_operator_filter.o duckdb/ub_src_execution_operator_helper.o duckdb/ub_src_execution_operator_join.o duckdb/ub_src_execution_operator_order.o duckdb/ub_src_execution_operator_persistent.o duckdb/ub_src_execution_operator_projection.o duckdb/ub_src_execution_operator_scan.o duckdb/ub_src_execution_operator_schema.o duckdb/ub_src_execution_operator_set.o duckdb/ub_src_execution_physical_plan.o duckdb/ub_src_function_aggregate_distributive.o duckdb/ub_src_function_aggregate.o duckdb/ub_src_function.o duckdb/ub_src_function_cast.o duckdb/ub_src_function_pragma.o duckdb/ub_src_function_scalar_compressed_materialization.o duckdb/ub_src_function_scalar.o duckdb/ub_src_function_scalar_generic.o duckdb/ub_src_function_scalar_list.o duckdb/ub_src_function_scalar_operators.o duckdb/ub_src_function_scalar_sequence.o duckdb/ub_src_function_scalar_string.o duckdb/ub_src_function_scalar_string_regexp.o duckdb/ub_src_function_scalar_struct.o duckdb/ub_src_function_scalar_system.o duckdb/ub_src_function_table_arrow.o duckdb/ub_src_function_table.o duckdb/ub_src_function_table_system.o duckdb/ub_src_function_table_version.o duckdb/ub_src_main.o duckdb/ub_src_main_capi.o duckdb/ub_src_main_capi_cast.o duckdb/ub_src_main_chunk_scan_state.o duckdb/ub_src_main_extension.o duckdb/ub_src_main_relation.o duckdb/ub_src_main_settings.o duckdb/ub_src_optimizer.o duckdb/ub_src_optimizer_compressed_materialization.o duckdb/ub_src_optimizer_join_order.o duckdb/ub_src_optimizer_matcher.o duckdb/ub_src_optimizer_pullup.o duckdb/ub_src_optimizer_pushdown.o duckdb/ub_src_optimizer_rule.o duckdb/ub_src_optimizer_statistics_expression.o duckdb/ub_src_optimizer_statistics_operator.o duckdb/ub_src_parallel.o duckdb/ub_src_parser.o duckdb/ub_src_parser_constraints.o duckdb/ub_src_parser_expression.o duckdb/ub_src_parser_parsed_data.o duckdb/ub_src_parser_query_node.o duckdb/ub_src_parser_statement.o duckdb/ub_src_parser_tableref.o duckdb/ub_src_parser_transform_constraint.o duckdb/ub_src_parser_transform_expression.o duckdb/ub_src_parser_transform_helpers.o duckdb/ub_src_parser_transform_statement.o duckdb/ub_src_parser_transform_tableref.o duckdb/ub_src_planner.o duckdb/ub_src_planner_binder_expression.o duckdb/ub_src_planner_binder_query_node.o duckdb/ub_src_planner_binder_statement.o duckdb/ub_src_planner_binder_tableref.o duckdb/ub_src_planner_expression.o duckdb/ub_src_planner_expression_binder.o duckdb/ub_src_planner_filter.o duckdb/ub_src_planner_operator.o duckdb/ub_src_planner_parsed_data.o duckdb/ub_src_planner_subquery.o duckdb/ub_src_storage.o duckdb/ub_src_storage_buffer.o duckdb/ub_src_storage_checkpoint.o duckdb/ub_src_storage_compression.o duckdb/ub_src_storage_compression_chimp.o duckdb/ub_src_storage_metadata.o duckdb/ub_src_storage_serialization.o duckdb/ub_src_storage_statistics.o duckdb/ub_src_storage_table.o duckdb/ub_src_transaction.o duckdb/src/verification/copied_statement_verifier.o duckdb/src/verification/deserialized_statement_verifier.o duckdb/src/verification/deserialized_statement_verifier_v2.o duckdb/src/verification/external_statement_verifier.o duckdb/src/verification/no_operator_caching_verifier.o duckdb/src/verification/parsed_statement_verifier.o duckdb/src/verification/prepared_statement_verifier.o duckdb/src/verification/statement_verifier.o duckdb/src/verification/unoptimized_statement_verifier.o duckdb/third_party/fmt/format.o duckdb/third_party/fsst/fsst_avx512.o duckdb/third_party/fsst/libfsst.o duckdb/third_party/miniz/miniz.o duckdb/third_party/re2/re2/bitstate.o duckdb/third_party/re2/re2/compile.o duckdb/third_party/re2/re2/dfa.o duckdb/third_party/re2/re2/filtered_re2.o duckdb/third_party/re2/re2/mimics_pcre.o duckdb/third_party/re2/re2/nfa.o duckdb/third_party/re2/re2/onepass.o duckdb/third_party/re2/re2/parse.o duckdb/third_party/re2/re2/perl_groups.o duckdb/third_party/re2/re2/prefilter.o duckdb/third_party/re2/re2/prefilter_tree.o duckdb/third_party/re2/re2/prog.o duckdb/third_party/re2/re2/re2.o duckdb/third_party/re2/re2/regexp.o duckdb/third_party/re2/re2/set.o duckdb/third_party/re2/re2/simplify.o duckdb/third_party/re2/re2/stringpiece.o duckdb/third_party/re2/re2/tostring.o duckdb/third_party/re2/re2/unicode_casefold.o duckdb/third_party/re2/re2/unicode_groups.o duckdb/third_party/re2/util/rune.o duckdb/third_party/re2/util/strutil.o duckdb/third_party/hyperloglog/hyperloglog.o duckdb/third_party/hyperloglog/sds.o duckdb/third_party/fastpforlib/bitpacking.o duckdb/third_party/utf8proc/utf8proc.o duckdb/third_party/utf8proc/utf8proc_wrapper.o duckdb/third_party/libpg_query/pg_functions.o duckdb/third_party/libpg_query/postgres_parser.o duckdb/third_party/libpg_query/src_backend_nodes_list.o duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.o duckdb/third_party/libpg_query/src_backend_nodes_value.o duckdb/third_party/libpg_query/src_backend_parser_gram.o duckdb/third_party/libpg_query/src_backend_parser_parser.o duckdb/third_party/libpg_query/src_backend_parser_scan.o duckdb/third_party/libpg_query/src_backend_parser_scansup.o duckdb/third_party/libpg_query/src_common_keywords.o duckdb/third_party/mbedtls/library/asn1parse.o duckdb/third_party/mbedtls/library/base64.o duckdb/third_party/mbedtls/library/bignum.o duckdb/third_party/mbedtls/library/constant_time.o duckdb/third_party/mbedtls/library/md.o duckdb/third_party/mbedtls/library/oid.o duckdb/third_party/mbedtls/library/pem.o duckdb/third_party/mbedtls/library/pk.o duckdb/third_party/mbedtls/library/pk_wrap.o duckdb/third_party/mbedtls/library/pkparse.o duckdb/third_party/mbedtls/library/platform_util.o duckdb/third_party/mbedtls/library/rsa.o duckdb/third_party/mbedtls/library/rsa_alt_helpers.o duckdb/third_party/mbedtls/library/sha1.o duckdb/third_party/mbedtls/library/sha256.o duckdb/third_party/mbedtls/library/sha512.o duckdb/third_party/mbedtls/mbedtls_wrapper.o duckdb/extension/parquet/parquet_extension.o duckdb/extension/parquet/column_writer.o duckdb/extension/parquet/serialize_parquet.o duckdb/extension/parquet/parquet_reader.o duckdb/extension/parquet/parquet_timestamp.o duckdb/extension/parquet/parquet_writer.o duckdb/extension/parquet/column_reader.o duckdb/extension/parquet/parquet_statistics.o duckdb/extension/parquet/parquet_metadata.o duckdb/extension/parquet/zstd_file_system.o duckdb/third_party/parquet/parquet_constants.o duckdb/third_party/parquet/parquet_types.o duckdb/third_party/thrift/thrift/protocol/TProtocol.o duckdb/third_party/thrift/thrift/transport/TTransportException.o duckdb/third_party/thrift/thrift/transport/TBufferTransports.o duckdb/third_party/snappy/snappy.o duckdb/third_party/snappy/snappy-sinksource.o duckdb/third_party/zstd/decompress/zstd_ddict.o duckdb/third_party/zstd/decompress/huf_decompress.o duckdb/third_party/zstd/decompress/zstd_decompress.o duckdb/third_party/zstd/decompress/zstd_decompress_block.o duckdb/third_party/zstd/common/entropy_common.o duckdb/third_party/zstd/common/fse_decompress.o duckdb/third_party/zstd/common/zstd_common.o duckdb/third_party/zstd/common/error_private.o duckdb/third_party/zstd/common/xxhash.o duckdb/third_party/zstd/compress/fse_compress.o duckdb/third_party/zstd/compress/hist.o duckdb/third_party/zstd/compress/huf_compress.o duckdb/third_party/zstd/compress/zstd_compress.o duckdb/third_party/zstd/compress/zstd_compress_literals.o duckdb/third_party/zstd/compress/zstd_compress_sequences.o duckdb/third_party/zstd/compress/zstd_compress_superblock.o duckdb/third_party/zstd/compress/zstd_double_fast.o duckdb/third_party/zstd/compress/zstd_fast.o duckdb/third_party/zstd/compress/zstd_lazy.o duckdb/third_party/zstd/compress/zstd_ldm.o duckdb/third_party/zstd/compress/zstd_opt.o +SOURCES=duckdb/ub_src_catalog.o duckdb/ub_src_catalog_catalog_entry.o duckdb/ub_src_catalog_default.o duckdb/ub_src_common_adbc.o duckdb/ub_src_common_adbc_nanoarrow.o duckdb/ub_src_common.o duckdb/ub_src_common_arrow_appender.o duckdb/ub_src_common_arrow.o duckdb/ub_src_common_crypto.o duckdb/ub_src_common_enums.o duckdb/ub_src_common_operator.o duckdb/ub_src_common_progress_bar.o duckdb/ub_src_common_row_operations.o duckdb/ub_src_common_serializer.o duckdb/ub_src_common_sort.o duckdb/ub_src_common_types.o duckdb/ub_src_common_types_column.o duckdb/ub_src_common_types_row.o duckdb/ub_src_common_value_operations.o duckdb/src/common/vector_operations/boolean_operators.o duckdb/src/common/vector_operations/comparison_operators.o duckdb/src/common/vector_operations/generators.o duckdb/src/common/vector_operations/is_distinct_from.o duckdb/src/common/vector_operations/null_operations.o duckdb/src/common/vector_operations/numeric_inplace_operators.o duckdb/src/common/vector_operations/vector_cast.o duckdb/src/common/vector_operations/vector_copy.o duckdb/src/common/vector_operations/vector_hash.o duckdb/src/common/vector_operations/vector_storage.o duckdb/ub_src_core_functions_aggregate_algebraic.o duckdb/ub_src_core_functions_aggregate_distributive.o duckdb/ub_src_core_functions_aggregate_holistic.o duckdb/ub_src_core_functions_aggregate_nested.o duckdb/ub_src_core_functions_aggregate_regression.o duckdb/ub_src_core_functions.o duckdb/ub_src_core_functions_scalar_bit.o duckdb/ub_src_core_functions_scalar_blob.o duckdb/ub_src_core_functions_scalar_date.o duckdb/ub_src_core_functions_scalar_debug.o duckdb/ub_src_core_functions_scalar_enum.o duckdb/ub_src_core_functions_scalar_generic.o duckdb/ub_src_core_functions_scalar_list.o duckdb/ub_src_core_functions_scalar_map.o duckdb/ub_src_core_functions_scalar_math.o duckdb/ub_src_core_functions_scalar_operators.o duckdb/ub_src_core_functions_scalar_random.o duckdb/ub_src_core_functions_scalar_string.o duckdb/ub_src_core_functions_scalar_struct.o duckdb/ub_src_core_functions_scalar_union.o duckdb/ub_src_execution.o duckdb/ub_src_execution_expression_executor.o duckdb/ub_src_execution_index_art.o duckdb/ub_src_execution_nested_loop_join.o duckdb/ub_src_execution_operator_aggregate.o duckdb/ub_src_execution_operator_csv_scanner.o duckdb/ub_src_execution_operator_csv_scanner_sniffer.o duckdb/ub_src_execution_operator_filter.o duckdb/ub_src_execution_operator_helper.o duckdb/ub_src_execution_operator_join.o duckdb/ub_src_execution_operator_order.o duckdb/ub_src_execution_operator_persistent.o duckdb/ub_src_execution_operator_projection.o duckdb/ub_src_execution_operator_scan.o duckdb/ub_src_execution_operator_schema.o duckdb/ub_src_execution_operator_set.o duckdb/ub_src_execution_physical_plan.o duckdb/ub_src_function_aggregate_distributive.o duckdb/ub_src_function_aggregate.o duckdb/ub_src_function.o duckdb/ub_src_function_cast.o duckdb/ub_src_function_pragma.o duckdb/ub_src_function_scalar_compressed_materialization.o duckdb/ub_src_function_scalar.o duckdb/ub_src_function_scalar_generic.o duckdb/ub_src_function_scalar_list.o duckdb/ub_src_function_scalar_operators.o duckdb/ub_src_function_scalar_sequence.o duckdb/ub_src_function_scalar_string.o duckdb/ub_src_function_scalar_string_regexp.o duckdb/ub_src_function_scalar_struct.o duckdb/ub_src_function_scalar_system.o duckdb/ub_src_function_table_arrow.o duckdb/ub_src_function_table.o duckdb/ub_src_function_table_system.o duckdb/ub_src_function_table_version.o duckdb/ub_src_main.o duckdb/ub_src_main_capi.o duckdb/ub_src_main_capi_cast.o duckdb/ub_src_main_chunk_scan_state.o duckdb/ub_src_main_extension.o duckdb/ub_src_main_relation.o duckdb/ub_src_main_settings.o duckdb/ub_src_optimizer.o duckdb/ub_src_optimizer_compressed_materialization.o duckdb/ub_src_optimizer_join_order.o duckdb/ub_src_optimizer_matcher.o duckdb/ub_src_optimizer_pullup.o duckdb/ub_src_optimizer_pushdown.o duckdb/ub_src_optimizer_rule.o duckdb/ub_src_optimizer_statistics_expression.o duckdb/ub_src_optimizer_statistics_operator.o duckdb/ub_src_parallel.o duckdb/ub_src_parser.o duckdb/ub_src_parser_constraints.o duckdb/ub_src_parser_expression.o duckdb/ub_src_parser_parsed_data.o duckdb/ub_src_parser_query_node.o duckdb/ub_src_parser_statement.o duckdb/ub_src_parser_tableref.o duckdb/ub_src_parser_transform_constraint.o duckdb/ub_src_parser_transform_expression.o duckdb/ub_src_parser_transform_helpers.o duckdb/ub_src_parser_transform_statement.o duckdb/ub_src_parser_transform_tableref.o duckdb/ub_src_planner.o duckdb/ub_src_planner_binder_expression.o duckdb/ub_src_planner_binder_query_node.o duckdb/ub_src_planner_binder_statement.o duckdb/ub_src_planner_binder_tableref.o duckdb/ub_src_planner_expression.o duckdb/ub_src_planner_expression_binder.o duckdb/ub_src_planner_filter.o duckdb/ub_src_planner_operator.o duckdb/ub_src_planner_parsed_data.o duckdb/ub_src_planner_subquery.o duckdb/ub_src_storage.o duckdb/ub_src_storage_buffer.o duckdb/ub_src_storage_checkpoint.o duckdb/ub_src_storage_compression.o duckdb/ub_src_storage_compression_chimp.o duckdb/ub_src_storage_metadata.o duckdb/ub_src_storage_serialization.o duckdb/ub_src_storage_statistics.o duckdb/ub_src_storage_table.o duckdb/ub_src_transaction.o duckdb/src/verification/copied_statement_verifier.o duckdb/src/verification/deserialized_statement_verifier.o duckdb/src/verification/deserialized_statement_verifier_v2.o duckdb/src/verification/external_statement_verifier.o duckdb/src/verification/no_operator_caching_verifier.o duckdb/src/verification/parsed_statement_verifier.o duckdb/src/verification/prepared_statement_verifier.o duckdb/src/verification/statement_verifier.o duckdb/src/verification/unoptimized_statement_verifier.o duckdb/third_party/fmt/format.o duckdb/third_party/fsst/fsst_avx512.o duckdb/third_party/fsst/libfsst.o duckdb/third_party/miniz/miniz.o duckdb/third_party/re2/re2/bitstate.o duckdb/third_party/re2/re2/compile.o duckdb/third_party/re2/re2/dfa.o duckdb/third_party/re2/re2/filtered_re2.o duckdb/third_party/re2/re2/mimics_pcre.o duckdb/third_party/re2/re2/nfa.o duckdb/third_party/re2/re2/onepass.o duckdb/third_party/re2/re2/parse.o duckdb/third_party/re2/re2/perl_groups.o duckdb/third_party/re2/re2/prefilter.o duckdb/third_party/re2/re2/prefilter_tree.o duckdb/third_party/re2/re2/prog.o duckdb/third_party/re2/re2/re2.o duckdb/third_party/re2/re2/regexp.o duckdb/third_party/re2/re2/set.o duckdb/third_party/re2/re2/simplify.o duckdb/third_party/re2/re2/stringpiece.o duckdb/third_party/re2/re2/tostring.o duckdb/third_party/re2/re2/unicode_casefold.o duckdb/third_party/re2/re2/unicode_groups.o duckdb/third_party/re2/util/rune.o duckdb/third_party/re2/util/strutil.o duckdb/third_party/hyperloglog/hyperloglog.o duckdb/third_party/hyperloglog/sds.o duckdb/third_party/fastpforlib/bitpacking.o duckdb/third_party/utf8proc/utf8proc.o duckdb/third_party/utf8proc/utf8proc_wrapper.o duckdb/third_party/libpg_query/pg_functions.o duckdb/third_party/libpg_query/postgres_parser.o duckdb/third_party/libpg_query/src_backend_nodes_list.o duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.o duckdb/third_party/libpg_query/src_backend_nodes_value.o duckdb/third_party/libpg_query/src_backend_parser_gram.o duckdb/third_party/libpg_query/src_backend_parser_parser.o duckdb/third_party/libpg_query/src_backend_parser_scan.o duckdb/third_party/libpg_query/src_backend_parser_scansup.o duckdb/third_party/libpg_query/src_common_keywords.o duckdb/third_party/mbedtls/library/asn1parse.o duckdb/third_party/mbedtls/library/base64.o duckdb/third_party/mbedtls/library/bignum.o duckdb/third_party/mbedtls/library/constant_time.o duckdb/third_party/mbedtls/library/md.o duckdb/third_party/mbedtls/library/oid.o duckdb/third_party/mbedtls/library/pem.o duckdb/third_party/mbedtls/library/pk.o duckdb/third_party/mbedtls/library/pk_wrap.o duckdb/third_party/mbedtls/library/pkparse.o duckdb/third_party/mbedtls/library/platform_util.o duckdb/third_party/mbedtls/library/rsa.o duckdb/third_party/mbedtls/library/rsa_alt_helpers.o duckdb/third_party/mbedtls/library/sha1.o duckdb/third_party/mbedtls/library/sha256.o duckdb/third_party/mbedtls/library/sha512.o duckdb/third_party/mbedtls/mbedtls_wrapper.o