#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER) #include #endif namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; } using Sizes = std::vector; /// When packing the values of nullable columns at a given row, we have to /// store the fact that these values are nullable or not. This is achieved /// by encoding this information as a bitmap. Let S be the size in bytes of /// a packed values binary blob and T the number of bytes we may place into /// this blob, the size that the bitmap shall occupy in the blob is equal to: /// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for /// each value of S, the corresponding value of T, and the bitmap size: /// /// 32,28,4 /// 16,14,2 /// 8,7,1 /// 4,3,1 /// 2,1,1 /// namespace { template constexpr auto getBitmapSize() { return (sizeof(T) == 32) ? 4 : (sizeof(T) == 16) ? 2 : ((sizeof(T) == 8) ? 1 : ((sizeof(T) == 4) ? 1 : ((sizeof(T) == 2) ? 1 : 0))); } } template void fillFixedBatch(size_t num_rows, const T * source, T * dest) { for (size_t i = 0; i < num_rows; ++i) { *dest = *source; ++source; dest += step; } } /// Move keys of size T into binary blob, starting from offset. /// It is assumed that offset is aligned to sizeof(T). /// Example: sizeof(key) = 16, sizeof(T) = 4, offset = 8 /// out[0] : [--------****----] /// out[1] : [--------****----] /// ... template void fillFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray & out, size_t & offset) { for (size_t i = 0; i < keys_size; ++i) { if (key_sizes[i] == sizeof(T)) { const auto * column = key_columns[i]; size_t num_rows = column->size(); out.resize_fill(num_rows); /// Note: here we violate strict aliasing. /// It should be ok as log as we do not reffer to any value from `out` before filling. const char * source = static_cast(column)->getRawDataBegin(); T * dest = reinterpret_cast(reinterpret_cast(out.data()) + offset); fillFixedBatch(num_rows, reinterpret_cast(source), dest); offset += sizeof(T); } } } /// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the /// binary blob. Keys are placed starting from the longest one. template void packFixedBatch(size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, PaddedPODArray & out) { size_t offset = 0; fillFixedBatch(keys_size, key_columns, key_sizes, out, offset); fillFixedBatch(keys_size, key_columns, key_sizes, out, offset); fillFixedBatch(keys_size, key_columns, key_sizes, out, offset); fillFixedBatch(keys_size, key_columns, key_sizes, out, offset); fillFixedBatch(keys_size, key_columns, key_sizes, out, offset); } template using KeysNullMap = std::array()>; /// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the /// binary blob, they are disposed in it consecutively. template static inline T ALWAYS_INLINE packFixed( size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const ColumnRawPtrs * low_cardinality_positions [[maybe_unused]] = nullptr, const Sizes * low_cardinality_sizes [[maybe_unused]] = nullptr) { T key{}; char * bytes = reinterpret_cast(&key); size_t offset = 0; for (size_t j = 0; j < keys_size; ++j) { size_t index = i; const IColumn * column = key_columns[j]; if constexpr (has_low_cardinality) { if (const IColumn * positions = (*low_cardinality_positions)[j]) { switch ((*low_cardinality_sizes)[j]) { case sizeof(UInt8): index = assert_cast(positions)->getElement(i); break; case sizeof(UInt16): index = assert_cast(positions)->getElement(i); break; case sizeof(UInt32): index = assert_cast(positions)->getElement(i); break; case sizeof(UInt64): index = assert_cast(positions)->getElement(i); break; default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); } } } switch (key_sizes[j]) { case 1: { memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index, 1); offset += 1; } break; case 2: if constexpr (sizeof(T) >= 2) /// To avoid warning about memcpy exceeding object size. { memcpy(bytes + offset, static_cast(column)->getRawDataBegin<2>() + index * 2, 2); offset += 2; } break; case 4: if constexpr (sizeof(T) >= 4) { memcpy(bytes + offset, static_cast(column)->getRawDataBegin<4>() + index * 4, 4); offset += 4; } break; case 8: if constexpr (sizeof(T) >= 8) { memcpy(bytes + offset, static_cast(column)->getRawDataBegin<8>() + index * 8, 8); offset += 8; } break; default: memcpy(bytes + offset, static_cast(column)->getRawDataBegin<1>() + index * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } return key; } /// Similar as above but supports nullable values. template static inline T ALWAYS_INLINE packFixed( size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, const KeysNullMap & bitmap) { union { T key; char bytes[sizeof(key)] = {}; }; size_t offset = 0; static constexpr auto bitmap_size = std::tuple_size>::value; static constexpr bool has_bitmap = bitmap_size > 0; if (has_bitmap) { memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8)); offset += bitmap_size; } for (size_t j = 0; j < keys_size; ++j) { bool is_null; if (!has_bitmap) is_null = false; else { size_t bucket = j / 8; size_t off = j % 8; is_null = ((bitmap[bucket] >> off) & 1) == 1; } if (is_null) continue; switch (key_sizes[j]) { case 1: memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i, 1); offset += 1; break; case 2: memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<2>() + i * 2, 2); offset += 2; break; case 4: memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<4>() + i * 4, 4); offset += 4; break; case 8: memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<8>() + i * 8, 8); offset += 8; break; default: memcpy(bytes + offset, static_cast(key_columns[j])->getRawDataBegin<1>() + i * key_sizes[j], key_sizes[j]); offset += key_sizes[j]; } } return key; } /// Hash a set of keys into a UInt128 value. static inline UInt128 ALWAYS_INLINE hash128( size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) { UInt128 key; SipHash hash; for (size_t j = 0; j < keys_size; ++j) key_columns[j]->updateHashWithValue(i, hash); hash.get128(key); return key; } /// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. static inline StringRef * ALWAYS_INLINE placeKeysInPool( size_t keys_size, StringRefs & keys, Arena & pool) { for (size_t j = 0; j < keys_size; ++j) { char * place = pool.alloc(keys[j].size); memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size); keys[j].data = place; } /// Place the StringRefs on the newly copied keys in the pool. char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef)); memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef)); return reinterpret_cast(res); } /** Serialize keys into a continuous chunk of memory. */ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool) { const char * begin = nullptr; size_t sum_size = 0; for (size_t j = 0; j < keys_size; ++j) sum_size += key_columns[j]->serializeValueIntoArena(i, pool, begin).size; return {begin, sum_size}; } /** Pack elements with shuffle instruction. * See the explanation in ColumnsHashing.h */ #if defined(__SSSE3__) && !defined(MEMORY_SANITIZER) template static T inline packFixedShuffle( const char * __restrict * __restrict srcs, size_t num_srcs, const size_t * __restrict elem_sizes, size_t idx, const uint8_t * __restrict masks) { assert(num_srcs > 0); __m128i res = _mm_shuffle_epi8( _mm_loadu_si128(reinterpret_cast(srcs[0] + elem_sizes[0] * idx)), _mm_loadu_si128(reinterpret_cast(masks))); for (size_t i = 1; i < num_srcs; ++i) { res = _mm_xor_si128(res, _mm_shuffle_epi8( _mm_loadu_si128(reinterpret_cast(srcs[i] + elem_sizes[i] * idx)), _mm_loadu_si128(reinterpret_cast(&masks[i * sizeof(T)])))); } T out; __builtin_memcpy(&out, &res, sizeof(T)); return out; } #endif }