mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 22:22:00 +00:00
Merge remote-tracking branch 'upstream/master' into fix25
This commit is contained in:
commit
ad98560d78
@ -1,5 +1,5 @@
|
||||
if (OS_LINUX AND NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE)
|
||||
option (ENABLE_MIMALLOC "Set to FALSE to disable usage of mimalloc for internal ClickHouse caches" ${NOT_UNBUNDLED})
|
||||
option (ENABLE_MIMALLOC "Set to FALSE to disable usage of mimalloc for internal ClickHouse caches" FALSE)
|
||||
endif ()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h")
|
||||
@ -8,6 +8,8 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include/mimalloc.h")
|
||||
endif ()
|
||||
|
||||
if (ENABLE_MIMALLOC)
|
||||
message (FATAL_ERROR "Mimalloc is not production ready. (Disable with cmake -D ENABLE_MIMALLOC=0). If you want to use mimalloc, you must manually remove this message.")
|
||||
|
||||
set (MIMALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/mimalloc/include)
|
||||
set (USE_MIMALLOC 1)
|
||||
set (MIMALLOC_LIBRARY mimalloc-static)
|
||||
|
@ -5,15 +5,33 @@
|
||||
#include "MiAllocator.h"
|
||||
#include <mimalloc.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
}
|
||||
|
||||
void * MiAllocator::alloc(size_t size, size_t alignment)
|
||||
{
|
||||
void * ptr;
|
||||
if (alignment == 0)
|
||||
return mi_malloc(size);
|
||||
{
|
||||
ptr = mi_malloc(size);
|
||||
if (!ptr)
|
||||
DB::throwFromErrno("MiAllocator: Cannot allocate in mimalloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
return mi_malloc_aligned(size, alignment);
|
||||
{
|
||||
ptr = mi_malloc_aligned(size, alignment);
|
||||
if (!ptr)
|
||||
DB::throwFromErrno("MiAllocator: Cannot allocate in mimalloc (mi_malloc_aligned) " + formatReadableSizeWithBinarySuffix(size) + " with alignment " + toString(alignment) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void MiAllocator::free(void * buf, size_t)
|
||||
@ -32,10 +50,21 @@ void * MiAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alig
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (alignment == 0)
|
||||
return mi_realloc(old_ptr, alignment);
|
||||
void * ptr;
|
||||
|
||||
return mi_realloc_aligned(old_ptr, new_size, alignment);
|
||||
if (alignment == 0)
|
||||
{
|
||||
ptr = mi_realloc(old_ptr, alignment);
|
||||
if (!ptr)
|
||||
DB::throwFromErrno("MiAllocator: Cannot reallocate in mimalloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = mi_realloc_aligned(old_ptr, new_size, alignment);
|
||||
if (!ptr)
|
||||
DB::throwFromErrno("MiAllocator: Cannot reallocate in mimalloc (mi_realloc_aligned) " + formatReadableSizeWithBinarySuffix(size) + " with alignment " + toString(alignment) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -75,3 +75,6 @@ target_link_libraries (cow_compositions PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (stopwatch stopwatch.cpp)
|
||||
target_link_libraries (stopwatch PRIVATE clickhouse_common_io)
|
||||
|
||||
add_executable (mi_malloc_test mi_malloc_test.cpp)
|
||||
target_link_libraries (mi_malloc_test PRIVATE clickhouse_common_io)
|
||||
|
118
dbms/src/Common/tests/mi_malloc_test.cpp
Normal file
118
dbms/src/Common/tests/mi_malloc_test.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
/** In addition to ClickHouse (Apache 2) license, this file can be also used under MIT license:
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Yandex LLC, Alexey Milovidov
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include <random>
|
||||
#include <stdexcept>
|
||||
#include <iostream>
|
||||
|
||||
#include <Common/config.h>
|
||||
|
||||
//#undef USE_MIMALLOC
|
||||
//#define USE_MIMALLOC 0
|
||||
|
||||
#if USE_MIMALLOC
|
||||
|
||||
#include <mimalloc.h>
|
||||
#define malloc mi_malloc
|
||||
#define free mi_free
|
||||
|
||||
#else
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
size_t total_size{0};
|
||||
|
||||
struct Allocation
|
||||
{
|
||||
void * ptr = nullptr;
|
||||
size_t size = 0;
|
||||
|
||||
Allocation() {}
|
||||
|
||||
Allocation(size_t size)
|
||||
: size(size)
|
||||
{
|
||||
ptr = malloc(size);
|
||||
if (!ptr)
|
||||
throw std::runtime_error("Cannot allocate memory");
|
||||
total_size += size;
|
||||
}
|
||||
|
||||
~Allocation()
|
||||
{
|
||||
if (ptr)
|
||||
{
|
||||
free(ptr);
|
||||
total_size -= size;
|
||||
}
|
||||
ptr = nullptr;
|
||||
}
|
||||
|
||||
Allocation(const Allocation &) = delete;
|
||||
|
||||
Allocation(Allocation && rhs)
|
||||
{
|
||||
ptr = rhs.ptr;
|
||||
size = rhs.size;
|
||||
rhs.ptr = nullptr;
|
||||
rhs.size = 0;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
std::vector<Allocation> allocations;
|
||||
|
||||
constexpr size_t limit = 100000000;
|
||||
constexpr size_t min_alloc_size = 65536;
|
||||
constexpr size_t max_alloc_size = 10000000;
|
||||
|
||||
std::mt19937 rng;
|
||||
auto distribution = std::uniform_int_distribution(min_alloc_size, max_alloc_size);
|
||||
|
||||
size_t total_allocations = 0;
|
||||
|
||||
while (true)
|
||||
{
|
||||
size_t size = distribution(rng);
|
||||
|
||||
while (total_size + size > limit)
|
||||
allocations.pop_back();
|
||||
|
||||
allocations.emplace_back(size);
|
||||
|
||||
++total_allocations;
|
||||
if (total_allocations % (1ULL << 20) == 0)
|
||||
std::cerr << "Total allocations: " << total_allocations << "\n";
|
||||
}
|
||||
}
|
@ -12,6 +12,7 @@
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <type_traits>
|
||||
#include <limits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -24,28 +25,23 @@ extern const int CANNOT_DECOMPRESS;
|
||||
|
||||
namespace
|
||||
{
|
||||
UInt32 getDeltaTypeByteSize(UInt8 data_bytes_size)
|
||||
|
||||
Int64 getMaxValueForByteSize(UInt8 byte_size)
|
||||
{
|
||||
// both delta and double delta can be twice the size of data item, but not less than 32 bits and not more that 64.
|
||||
return std::min(64/8, std::max(32/8, data_bytes_size * 2));
|
||||
}
|
||||
|
||||
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
|
||||
{
|
||||
const UInt8 items_count_size = 4;
|
||||
|
||||
return items_count_size + data_bytes_size + getDeltaTypeByteSize(data_bytes_size);
|
||||
}
|
||||
|
||||
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
|
||||
{
|
||||
const UInt32 items_count = uncompressed_size / data_bytes_size;
|
||||
|
||||
// 11111 + max 64 bits of double delta.
|
||||
const UInt32 max_item_size_bits = 5 + getDeltaTypeByteSize(data_bytes_size) * 8;
|
||||
|
||||
// + 8 is to round up to next byte.
|
||||
return (items_count * max_item_size_bits + 8) / 8;
|
||||
switch (byte_size)
|
||||
{
|
||||
case sizeof(UInt8):
|
||||
return std::numeric_limits<Int8>::max();
|
||||
case sizeof(UInt16):
|
||||
return std::numeric_limits<Int16>::max();
|
||||
case sizeof(UInt32):
|
||||
return std::numeric_limits<Int32>::max();
|
||||
case sizeof(UInt64):
|
||||
return std::numeric_limits<Int64>::max();
|
||||
default:
|
||||
assert(false && "only 1, 2, 4 and 8 data sizes are supported");
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
struct WriteSpec
|
||||
@ -55,8 +51,10 @@ struct WriteSpec
|
||||
const UInt8 data_bits;
|
||||
};
|
||||
|
||||
const std::array<UInt8, 5> DELTA_SIZES{7, 9, 12, 32, 64};
|
||||
|
||||
template <typename T>
|
||||
WriteSpec getWriteSpec(const T & value)
|
||||
WriteSpec getDeltaWriteSpec(const T & value)
|
||||
{
|
||||
if (value > -63 && value < 64)
|
||||
{
|
||||
@ -80,27 +78,60 @@ WriteSpec getWriteSpec(const T & value)
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename DeltaType>
|
||||
WriteSpec getDeltaMaxWriteSpecByteSize(UInt8 data_bytes_size)
|
||||
{
|
||||
return getDeltaWriteSpec(getMaxValueForByteSize(data_bytes_size));
|
||||
}
|
||||
|
||||
UInt32 getCompressedHeaderSize(UInt8 data_bytes_size)
|
||||
{
|
||||
const UInt8 items_count_size = 4;
|
||||
const UInt8 first_delta_bytes_size = data_bytes_size;
|
||||
|
||||
return items_count_size + data_bytes_size + first_delta_bytes_size;
|
||||
}
|
||||
|
||||
UInt32 getCompressedDataSize(UInt8 data_bytes_size, UInt32 uncompressed_size)
|
||||
{
|
||||
const UInt32 items_count = uncompressed_size / data_bytes_size;
|
||||
const auto double_delta_write_spec = getDeltaMaxWriteSpecByteSize(data_bytes_size);
|
||||
|
||||
const UInt32 max_item_size_bits = double_delta_write_spec.prefix_bits + double_delta_write_spec.data_bits;
|
||||
|
||||
// + 8 is to round up to next byte.
|
||||
auto result = (items_count * max_item_size_bits + 7) / 8;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename ValueType>
|
||||
UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
static_assert(std::is_unsigned_v<T> && std::is_signed_v<DeltaType>, "T must be unsigned, while DeltaType must be signed integer type.");
|
||||
using UnsignedDeltaType = typename std::make_unsigned<DeltaType>::type;
|
||||
// Since only unsinged int has granted 2-compliment overflow handling, we are doing math here on unsigned types.
|
||||
// To simplify and booletproof code, we operate enforce ValueType to be unsigned too.
|
||||
static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
|
||||
using UnsignedDeltaType = ValueType;
|
||||
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception("Cannot compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
|
||||
// We use signed delta type to turn huge unsigned values into smaller signed:
|
||||
// ffffffff => -1
|
||||
using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;
|
||||
|
||||
if (source_size % sizeof(ValueType) != 0)
|
||||
throw Exception("Cannot compress, data size " + toString(source_size)
|
||||
+ " is not aligned to " + toString(sizeof(ValueType)), ErrorCodes::CANNOT_COMPRESS);
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = source_size / sizeof(T);
|
||||
const UInt32 items_count = source_size / sizeof(ValueType);
|
||||
unalignedStore<UInt32>(dest, items_count);
|
||||
dest += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
DeltaType prev_delta{};
|
||||
ValueType prev_value{};
|
||||
UnsignedDeltaType prev_delta{};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore<T>(dest, prev_value);
|
||||
prev_value = unalignedLoad<ValueType>(source);
|
||||
unalignedStore<ValueType>(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
@ -108,24 +139,26 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
const T curr_value = unalignedLoad<T>(source);
|
||||
prev_delta = static_cast<DeltaType>(curr_value - prev_value);
|
||||
unalignedStore<DeltaType>(dest, prev_delta);
|
||||
const ValueType curr_value = unalignedLoad<ValueType>(source);
|
||||
|
||||
prev_delta = curr_value - prev_value;
|
||||
unalignedStore<UnsignedDeltaType>(dest, prev_delta);
|
||||
|
||||
source += sizeof(curr_value);
|
||||
dest += sizeof(prev_delta);
|
||||
prev_value = curr_value;
|
||||
}
|
||||
|
||||
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(T), source_size - sizeof(T)*2));
|
||||
WriteBuffer buffer(dest, getCompressedDataSize(sizeof(ValueType), source_size - sizeof(ValueType)*2));
|
||||
BitWriter writer(buffer);
|
||||
|
||||
for (; source < source_end; source += sizeof(T))
|
||||
int item = 2;
|
||||
for (; source < source_end; source += sizeof(ValueType), ++item)
|
||||
{
|
||||
const T curr_value = unalignedLoad<T>(source);
|
||||
const ValueType curr_value = unalignedLoad<ValueType>(source);
|
||||
|
||||
const DeltaType delta = static_cast<DeltaType>(curr_value - prev_value);
|
||||
const DeltaType double_delta = delta - prev_delta;
|
||||
const UnsignedDeltaType delta = curr_value - prev_value;
|
||||
const UnsignedDeltaType double_delta = delta - prev_delta;
|
||||
|
||||
prev_delta = delta;
|
||||
prev_value = curr_value;
|
||||
@ -136,9 +169,11 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto sign = std::signbit(double_delta);
|
||||
const auto abs_value = static_cast<UnsignedDeltaType>(std::abs(double_delta));
|
||||
const auto write_spec = getWriteSpec(double_delta);
|
||||
const SignedDeltaType signed_dd = static_cast<SignedDeltaType>(double_delta);
|
||||
const auto sign = std::signbit(signed_dd);
|
||||
// -1 shirnks dd down to fit into number of bits, and there can't be 0, so it is OK.
|
||||
const auto abs_value = static_cast<UnsignedDeltaType>(std::abs(signed_dd) - 1);
|
||||
const auto write_spec = getDeltaWriteSpec(signed_dd);
|
||||
|
||||
writer.writeBits(write_spec.prefix_bits, write_spec.prefix);
|
||||
writer.writeBits(1, sign);
|
||||
@ -151,22 +186,25 @@ UInt32 compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
return sizeof(items_count) + sizeof(prev_value) + sizeof(prev_delta) + buffer.count();
|
||||
}
|
||||
|
||||
template <typename T, typename DeltaType>
|
||||
template <typename ValueType>
|
||||
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
static_assert(std::is_unsigned_v<T> && std::is_signed_v<DeltaType>, "T must be unsigned, while DeltaType must be signed integer type.");
|
||||
static_assert(std::is_unsigned_v<ValueType>, "ValueType must be unsigned.");
|
||||
using UnsignedDeltaType = ValueType;
|
||||
using SignedDeltaType = typename std::make_signed<UnsignedDeltaType>::type;
|
||||
|
||||
const char * source_end = source + source_size;
|
||||
|
||||
const UInt32 items_count = unalignedLoad<UInt32>(source);
|
||||
source += sizeof(items_count);
|
||||
|
||||
T prev_value{};
|
||||
DeltaType prev_delta{};
|
||||
ValueType prev_value{};
|
||||
UnsignedDeltaType prev_delta{};
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_value = unalignedLoad<T>(source);
|
||||
unalignedStore<T>(dest, prev_value);
|
||||
prev_value = unalignedLoad<ValueType>(source);
|
||||
unalignedStore<ValueType>(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_value);
|
||||
dest += sizeof(prev_value);
|
||||
@ -174,9 +212,9 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
|
||||
if (source < source_end)
|
||||
{
|
||||
prev_delta = unalignedLoad<DeltaType>(source);
|
||||
prev_value = prev_value + static_cast<T>(prev_delta);
|
||||
unalignedStore<T>(dest, prev_value);
|
||||
prev_delta = unalignedLoad<UnsignedDeltaType>(source);
|
||||
prev_value = prev_value + static_cast<ValueType>(prev_delta);
|
||||
unalignedStore<ValueType>(dest, prev_value);
|
||||
|
||||
source += sizeof(prev_delta);
|
||||
dest += sizeof(prev_value);
|
||||
@ -189,32 +227,35 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
// we have to keep track of items to avoid reading more that there is.
|
||||
for (UInt32 items_read = 2; items_read < items_count && !reader.eof(); ++items_read)
|
||||
{
|
||||
DeltaType double_delta = 0;
|
||||
UnsignedDeltaType double_delta = 0;
|
||||
if (reader.readBit() == 1)
|
||||
{
|
||||
const UInt8 data_sizes[] = {6, 8, 11, 31, 63};
|
||||
UInt8 i = 0;
|
||||
for (; i < sizeof(data_sizes) - 1; ++i)
|
||||
for (; i < sizeof(DELTA_SIZES) - 1; ++i)
|
||||
{
|
||||
const auto next_bit = reader.readBit();
|
||||
if (next_bit == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const UInt8 sign = reader.readBit();
|
||||
double_delta = static_cast<DeltaType>(reader.readBits(data_sizes[i]));
|
||||
SignedDeltaType signed_dd = static_cast<SignedDeltaType>(reader.readBits(DELTA_SIZES[i] - 1) + 1);
|
||||
if (sign)
|
||||
{
|
||||
double_delta *= -1;
|
||||
signed_dd *= -1;
|
||||
}
|
||||
double_delta = static_cast<UnsignedDeltaType>(signed_dd);
|
||||
}
|
||||
// else if first bit is zero, no need to read more data.
|
||||
|
||||
const T curr_value = prev_value + static_cast<T>(prev_delta + double_delta);
|
||||
unalignedStore<T>(dest, curr_value);
|
||||
const UnsignedDeltaType delta = double_delta + prev_delta;
|
||||
const ValueType curr_value = prev_value + delta;
|
||||
unalignedStore<ValueType>(dest, curr_value);
|
||||
dest += sizeof(curr_value);
|
||||
|
||||
prev_delta = static_cast<DeltaType>(curr_value - prev_value);
|
||||
prev_delta = curr_value - prev_value;
|
||||
prev_value = curr_value;
|
||||
}
|
||||
}
|
||||
@ -267,19 +308,20 @@ UInt32 CompressionCodecDoubleDelta::doCompressData(const char * source, UInt32 s
|
||||
memcpy(&dest[2], source, bytes_to_skip);
|
||||
size_t start_pos = 2 + bytes_to_skip;
|
||||
UInt32 compressed_size = 0;
|
||||
|
||||
switch (data_bytes_size)
|
||||
{
|
||||
case 1:
|
||||
compressed_size = compressDataForType<UInt8, Int16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
compressed_size = compressDataForType<UInt8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 2:
|
||||
compressed_size = compressDataForType<UInt16, Int32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
compressed_size = compressDataForType<UInt16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 4:
|
||||
compressed_size = compressDataForType<UInt32, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
compressed_size = compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 8:
|
||||
compressed_size = compressDataForType<UInt64, Int64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
compressed_size = compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -296,16 +338,16 @@ void CompressionCodecDoubleDelta::doDecompressData(const char * source, UInt32 s
|
||||
switch (bytes_size)
|
||||
{
|
||||
case 1:
|
||||
decompressDataForType<UInt8, Int16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 2:
|
||||
decompressDataForType<UInt16, Int32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 4:
|
||||
decompressDataForType<UInt32, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 8:
|
||||
decompressDataForType<UInt64, Int64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -8,16 +8,16 @@
|
||||
|
||||
#include <boost/format.hpp>
|
||||
|
||||
#include <bitset>
|
||||
#include <cmath>
|
||||
#include <initializer_list>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <typeinfo>
|
||||
#include <iterator>
|
||||
#include <optional>
|
||||
#include <iostream>
|
||||
#include <bitset>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <typeinfo>
|
||||
#include <vector>
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
@ -114,36 +114,71 @@ template <typename T, typename ContainerLeft, typename ContainerRight>
|
||||
result = ::testing::AssertionFailure();
|
||||
}
|
||||
|
||||
result << "mismatching " << sizeof(T) << "-byte item #" << i
|
||||
<< "\nexpected: " << bin(left_value)
|
||||
<< "\ngot : " << bin(right_value)
|
||||
<< std::endl;
|
||||
|
||||
if (++mismatching_items >= MAX_MISMATCHING_ITEMS)
|
||||
if (++mismatching_items <= MAX_MISMATCHING_ITEMS)
|
||||
{
|
||||
result << "..." << std::endl;
|
||||
break;
|
||||
result << "mismatching " << sizeof(T) << "-byte item #" << i
|
||||
<< "\nexpected: " << bin(left_value) << " (0x" << std::hex << left_value << ")"
|
||||
<< "\ngot : " << bin(right_value) << " (0x" << std::hex << right_value << ")"
|
||||
<< std::endl;
|
||||
if (mismatching_items == MAX_MISMATCHING_ITEMS)
|
||||
{
|
||||
result << "..." << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (mismatching_items > 0)
|
||||
{
|
||||
result << "\ntotal mismatching items:" << mismatching_items << " of " << size;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct CodecTestParam
|
||||
{
|
||||
std::string type_name;
|
||||
std::vector<char> source_data;
|
||||
UInt8 data_byte_size;
|
||||
double min_compression_ratio;
|
||||
std::string case_name;
|
||||
|
||||
// to allow setting ratio after building with complex builder functions.
|
||||
CodecTestParam && setRatio(const double & ratio) &&
|
||||
{
|
||||
this->min_compression_ratio = ratio;
|
||||
return std::move(*this);
|
||||
}
|
||||
};
|
||||
|
||||
CodecTestParam operator+(CodecTestParam && left, CodecTestParam && right)
|
||||
{
|
||||
assert(left.type_name == right.type_name);
|
||||
assert(left.data_byte_size == right.data_byte_size);
|
||||
|
||||
std::vector data(std::move(left.source_data));
|
||||
data.insert(data.end(), right.source_data.begin(), right.source_data.end());
|
||||
|
||||
return CodecTestParam{
|
||||
left.type_name,
|
||||
std::move(data),
|
||||
left.data_byte_size,
|
||||
std::min(left.min_compression_ratio, right.min_compression_ratio),
|
||||
left.case_name + " + " + right.case_name
|
||||
};
|
||||
}
|
||||
|
||||
std::ostream & operator<<(std::ostream & ostr, const CodecTestParam & param)
|
||||
{
|
||||
return ostr << "name: " << param.case_name
|
||||
<< "\ntype name:" << param.type_name
|
||||
<< "\nbyte size: " << static_cast<UInt32>(param.data_byte_size)
|
||||
<< "\ndata size: " << param.source_data.size();
|
||||
}
|
||||
|
||||
// compression ratio < 1.0 means that codec output is smaller than input.
|
||||
const double DEFAULT_MIN_COMPRESSION_RATIO = 1.0;
|
||||
|
||||
template <typename T, typename... Args>
|
||||
CodecTestParam makeParam(Args && ... args)
|
||||
{
|
||||
@ -157,11 +192,11 @@ CodecTestParam makeParam(Args && ... args)
|
||||
write_pos += sizeof(v);
|
||||
}
|
||||
|
||||
return CodecTestParam{std::move(data), sizeof(T),
|
||||
(boost::format("%1% %2%") % (sizeof(T) * std::size(vals)) % " predefined values").str()};
|
||||
return CodecTestParam{type_name<T>(), std::move(data), sizeof(T), DEFAULT_MIN_COMPRESSION_RATIO,
|
||||
(boost::format("%1% values of %2%") % std::size(vals) % type_name<T>()).str()};
|
||||
}
|
||||
|
||||
template <typename T, size_t Begin = 1, size_t End = 10000, typename Generator>
|
||||
template <typename T, size_t Begin = 1, size_t End = 10001, typename Generator>
|
||||
CodecTestParam generateParam(Generator gen, const char* gen_name)
|
||||
{
|
||||
static_assert (End >= Begin, "End must be not less than Begin");
|
||||
@ -176,8 +211,8 @@ CodecTestParam generateParam(Generator gen, const char* gen_name)
|
||||
write_pos += sizeof(v);
|
||||
}
|
||||
|
||||
return CodecTestParam{std::move(data), sizeof(T),
|
||||
(boost::format("%1% from %2% (%3% => %4%)") % type_name<T>() % gen_name % Begin % End).str()};
|
||||
return CodecTestParam{type_name<T>(), std::move(data), sizeof(T), DEFAULT_MIN_COMPRESSION_RATIO,
|
||||
(boost::format("%1% values of %2% from %3%") % (End - Begin) % type_name<T>() % gen_name).str()};
|
||||
}
|
||||
|
||||
void TestTranscoding(ICompressionCodec * codec, const CodecTestParam & param)
|
||||
@ -211,6 +246,13 @@ void TestTranscoding(ICompressionCodec * codec, const CodecTestParam & param)
|
||||
default:
|
||||
FAIL() << "Invalid data_byte_size: " << param.data_byte_size;
|
||||
}
|
||||
const auto header_size = codec->getHeaderSize();
|
||||
const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0);
|
||||
|
||||
ASSERT_LE(compression_ratio, param.min_compression_ratio)
|
||||
<< "\n\tdecoded size: " << source_data.size()
|
||||
<< "\n\tencoded size: " << encoded_size
|
||||
<< "(no header: " << encoded_size - header_size << ")";
|
||||
}
|
||||
|
||||
class CodecTest : public ::testing::TestWithParam<CodecTestParam>
|
||||
@ -225,20 +267,34 @@ public:
|
||||
|
||||
TEST_P(CodecTest, DoubleDelta)
|
||||
{
|
||||
const auto & param = GetParam();
|
||||
auto param = GetParam();
|
||||
auto codec = std::make_unique<CompressionCodecDoubleDelta>(param.data_byte_size);
|
||||
if (param.type_name == type_name<Float32>() || param.type_name == type_name<Float64>())
|
||||
{
|
||||
// dd doesn't work great with many cases of integers and may result in very poor compression rate.
|
||||
param.min_compression_ratio *= 1.5;
|
||||
}
|
||||
|
||||
TestTranscoding(codec.get(), param);
|
||||
}
|
||||
|
||||
TEST_P(CodecTest, Gorilla)
|
||||
{
|
||||
const auto & param = GetParam();
|
||||
auto param = GetParam();
|
||||
auto codec = std::make_unique<CompressionCodecGorilla>(param.data_byte_size);
|
||||
if (param.type_name == type_name<UInt32>() || param.type_name == type_name<Int32>()
|
||||
|| param.type_name == type_name<UInt64>() || param.type_name == type_name<Int64>())
|
||||
{
|
||||
// gorilla doesn't work great with many cases of integers and may result in very poor compression rate.
|
||||
param.min_compression_ratio *= 1.5;
|
||||
}
|
||||
|
||||
TestTranscoding(codec.get(), param);
|
||||
}
|
||||
|
||||
// Here we use generators to produce test payload for codecs.
|
||||
// Generator is a callable that should produce output value of the same type as input value.
|
||||
|
||||
auto SameValueGenerator = [](auto value)
|
||||
{
|
||||
return [=](auto i)
|
||||
@ -256,30 +312,44 @@ auto SequentialGenerator = [](auto stride = 1)
|
||||
};
|
||||
};
|
||||
|
||||
// Generator that helps debugging output of other generators
|
||||
// by logging every output value alongside iteration index and input.
|
||||
//auto LoggingProxyGenerator = [](auto other_generator, const char * name, std::ostream & ostr, const int limit = std::numeric_limits<int>::max())
|
||||
//{
|
||||
// ostr << "\n\nValues from " << name << ":\n";
|
||||
// auto count = std::make_shared<int>(0);
|
||||
// return [&, count](auto i)
|
||||
// {
|
||||
// using ValueType = decltype(i);
|
||||
// const auto ret = static_cast<ValueType>(other_generator(i));
|
||||
// if (++(*count) < limit)
|
||||
// {
|
||||
// ostr << "\t" << *count << " : " << i << " => " << ret << "\n";
|
||||
// }
|
||||
|
||||
// return ret;
|
||||
// };
|
||||
//};
|
||||
|
||||
template <typename T>
|
||||
struct MonotonicGenerator
|
||||
{
|
||||
MonotonicGenerator(T stride = 1, size_t max_step = 10)
|
||||
: prev_value{},
|
||||
: prev_value(0),
|
||||
stride(stride),
|
||||
max_step(max_step)
|
||||
{}
|
||||
|
||||
template <typename U>
|
||||
U operator()(U i)
|
||||
U operator()(U)
|
||||
{
|
||||
if (!prev_value.has_value())
|
||||
{
|
||||
prev_value = i * stride;
|
||||
}
|
||||
|
||||
const U result = *prev_value + static_cast<T>(stride * (rand() % max_step));
|
||||
const U result = prev_value + static_cast<T>(stride * (rand() % max_step));
|
||||
|
||||
prev_value = result;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::optional<T> prev_value;
|
||||
T prev_value;
|
||||
const T stride;
|
||||
const size_t max_step;
|
||||
};
|
||||
@ -296,25 +366,45 @@ auto MinMaxGenerator = [](auto i)
|
||||
}
|
||||
};
|
||||
|
||||
auto RandomGenerator = [](auto i) {return static_cast<decltype(i)>(rand());};
|
||||
template <typename T>
|
||||
struct RandomGenerator
|
||||
{
|
||||
RandomGenerator(T seed = 0, T value_cap = std::numeric_limits<T>::max())
|
||||
: e(seed),
|
||||
value_cap(value_cap)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
U operator()(U i)
|
||||
{
|
||||
return static_cast<decltype(i)>(distribution(e) % value_cap);
|
||||
}
|
||||
|
||||
private:
|
||||
std::default_random_engine e;
|
||||
std::uniform_int_distribution<T> distribution;
|
||||
const T value_cap;
|
||||
};
|
||||
|
||||
auto RandomishGenerator = [](auto i)
|
||||
{
|
||||
return static_cast<decltype(i)>(sin(static_cast<double>(i) * i) * i);
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Basic,
|
||||
// helper macro to produce human-friendly test case name
|
||||
#define G(generator) generator, #generator
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Mixed,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
makeParam<UInt32>(1, 2, 3, 4),
|
||||
makeParam<UInt64>(1, 2, 3, 4),
|
||||
makeParam<Float32>(1.1, 2.2, 3.3, 4.4),
|
||||
makeParam<Float64>(1.1, 2.2, 3.3, 4.4)
|
||||
generateParam<Int32, 1, 3>(G(MinMaxGenerator)) + generateParam<Int32, 1, 11>(G(SequentialGenerator(1))).setRatio(1),
|
||||
generateParam<UInt32, 1, 3>(G(MinMaxGenerator)) + generateParam<UInt32, 1, 11>(G(SequentialGenerator(1))).setRatio(1),
|
||||
generateParam<Int64, 1, 3>(G(MinMaxGenerator)) + generateParam<Int64, 1, 11>(G(SequentialGenerator(1))).setRatio(1),
|
||||
generateParam<UInt64, 1, 3>(G(MinMaxGenerator)) + generateParam<UInt64, 1, 11>(G(SequentialGenerator(1))).setRatio(1)
|
||||
),
|
||||
);
|
||||
|
||||
#define G(generator) generator, #generator
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(Same,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
@ -354,18 +444,20 @@ INSTANTIATE_TEST_CASE_P(Monotonic,
|
||||
INSTANTIATE_TEST_CASE_P(Random,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<UInt32>(G(RandomGenerator)),
|
||||
generateParam<UInt64>(G(RandomGenerator))
|
||||
generateParam<UInt32>(G(RandomGenerator<UInt32>(0, 1000'000'000))).setRatio(1.2),
|
||||
generateParam<UInt64>(G(RandomGenerator<UInt64>(0, 1000'000'000))).setRatio(1.1)
|
||||
),
|
||||
);
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(RandomLike,
|
||||
INSTANTIATE_TEST_CASE_P(Randomish,
|
||||
CodecTest,
|
||||
::testing::Values(
|
||||
generateParam<Int32>(G(RandomishGenerator)),
|
||||
generateParam<Int64>(G(RandomishGenerator)),
|
||||
generateParam<Float32>(G(RandomishGenerator)),
|
||||
generateParam<Float64>(G(RandomishGenerator))
|
||||
generateParam<Int32>(G(RandomishGenerator)).setRatio(1.1),
|
||||
generateParam<Int64>(G(RandomishGenerator)).setRatio(1.1),
|
||||
generateParam<UInt32>(G(RandomishGenerator)).setRatio(1.1),
|
||||
generateParam<UInt64>(G(RandomishGenerator)).setRatio(1.1),
|
||||
generateParam<Float32>(G(RandomishGenerator)).setRatio(1.1),
|
||||
generateParam<Float64>(G(RandomishGenerator)).setRatio(1.1)
|
||||
),
|
||||
);
|
||||
|
||||
|
@ -56,7 +56,7 @@
|
||||
|
||||
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405
|
||||
|
||||
#define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54421
|
||||
#define DBMS_MIN_REVISION_WITH_CLIENT_WRITE_INFO 54420
|
||||
|
||||
/// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change.
|
||||
#define DBMS_TCP_PROTOCOL_VERSION 54226
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionBinaryArithmetic.h>
|
||||
#include <boost/integer/common_factor.hpp>
|
||||
#include <numeric>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,7 +16,7 @@ struct GCDImpl
|
||||
{
|
||||
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
|
||||
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
|
||||
return boost::integer::gcd(
|
||||
return std::gcd(
|
||||
typename NumberTraits::ToInteger<Result>::Type(a),
|
||||
typename NumberTraits::ToInteger<Result>::Type(b));
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionBinaryArithmetic.h>
|
||||
#include <boost/integer/common_factor.hpp>
|
||||
#include <numeric>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -15,7 +16,7 @@ struct LCMImpl
|
||||
{
|
||||
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
|
||||
throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<B>::Type(b), typename NumberTraits::ToInteger<A>::Type(a));
|
||||
return boost::integer::lcm(
|
||||
return std::lcm(
|
||||
typename NumberTraits::ToInteger<Result>::Type(a),
|
||||
typename NumberTraits::ToInteger<Result>::Type(b));
|
||||
}
|
||||
|
@ -150,7 +150,6 @@ public:
|
||||
|
||||
const UInt64 mask = maskLowBits<UInt64>(to_write);
|
||||
v &= mask;
|
||||
// assert(v <= 255);
|
||||
|
||||
bits_buffer <<= to_write;
|
||||
bits_buffer |= v;
|
||||
|
@ -29,11 +29,10 @@ namespace ErrorCodes
|
||||
void ThreadStatus::attachQueryContext(Context & query_context_)
|
||||
{
|
||||
query_context = &query_context_;
|
||||
query_id = query_context->getCurrentQueryId();
|
||||
if (!global_context)
|
||||
global_context = &query_context->getGlobalContext();
|
||||
|
||||
query_id = query_context->getCurrentQueryId();
|
||||
|
||||
if (thread_group)
|
||||
{
|
||||
std::lock_guard lock(thread_group->mutex);
|
||||
@ -106,6 +105,9 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool
|
||||
thread_group->thread_numbers.emplace_back(thread_number);
|
||||
}
|
||||
|
||||
if (query_context)
|
||||
query_id = query_context->getCurrentQueryId();
|
||||
|
||||
#if defined(__linux__)
|
||||
/// Set "nice" value if required.
|
||||
if (query_context)
|
||||
@ -269,7 +271,7 @@ void CurrentThread::attachQueryContext(Context & query_context)
|
||||
{
|
||||
if (unlikely(!current_thread))
|
||||
return;
|
||||
return current_thread->attachQueryContext(query_context);
|
||||
current_thread->attachQueryContext(query_context);
|
||||
}
|
||||
|
||||
void CurrentThread::finalizePerformanceCounters()
|
||||
|
@ -178,6 +178,24 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"empty",
|
||||
[] (RPNElement & out, const Field &)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_IN_RANGE;
|
||||
out.range = Range("");
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notEmpty",
|
||||
[] (RPNElement & out, const Field &)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||
out.range = Range("");
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"like",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
@ -199,6 +217,27 @@ const KeyCondition::AtomMap KeyCondition::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notLike",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
{
|
||||
if (value.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
String prefix = extractFixedPrefixFromLikePattern(value.get<const String &>());
|
||||
if (prefix.empty())
|
||||
return false;
|
||||
|
||||
String right_bound = firstStringThatIsGreaterThanAllStringsWithPrefix(prefix);
|
||||
|
||||
out.function = RPNElement::FUNCTION_NOT_IN_RANGE;
|
||||
out.range = !right_bound.empty()
|
||||
? Range(prefix, true, right_bound, false)
|
||||
: Range::createLeftBounded(prefix, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"startsWith",
|
||||
[] (RPNElement & out, const Field & value)
|
||||
@ -645,92 +684,102 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
||||
{
|
||||
const ASTs & args = func->arguments->children;
|
||||
|
||||
if (args.size() != 2)
|
||||
return false;
|
||||
|
||||
DataTypePtr key_expr_type; /// Type of expression containing key column
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||
MonotonicFunctionsChain chain;
|
||||
bool is_set_const = false;
|
||||
bool is_constant_transformed = false;
|
||||
std::string func_name = func->name;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func->name)
|
||||
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
||||
if (args.size() == 1)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_set_const = true;
|
||||
if (!(isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)))
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
||||
else if (args.size() == 2)
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
is_constant_transformed = true;
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
bool is_set_const = false;
|
||||
bool is_constant_transformed = false;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func_name)
|
||||
&& tryPrepareSetIndex(args, context, out, key_column_num))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_set_const = true;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
else if (getConstant(args[1], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
}
|
||||
else if (getConstant(args[0], block_with_constants, const_value, const_type)
|
||||
&& canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type))
|
||||
{
|
||||
key_arg_pos = 1;
|
||||
is_constant_transformed = true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
||||
if (is_constant_transformed)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "greater")
|
||||
func_name = "greaterOrEquals";
|
||||
}
|
||||
|
||||
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
||||
if (key_arg_pos == 1)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "greater";
|
||||
else if (func_name == "greater")
|
||||
func_name = "less";
|
||||
else if (func_name == "greaterOrEquals")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "lessOrEquals")
|
||||
func_name = "greaterOrEquals";
|
||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
||||
{
|
||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool cast_not_needed =
|
||||
is_set_const /// Set args are already casted inside Set::createFromAST
|
||||
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
||||
|
||||
if (!cast_not_needed)
|
||||
castValueToType(key_expr_type, const_value, const_type, node);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
|
||||
if (key_column_num == static_cast<size_t>(-1))
|
||||
throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
std::string func_name = func->name;
|
||||
|
||||
/// Transformed constant must weaken the condition, for example "x > 5" must weaken to "round(x) >= 5"
|
||||
if (is_constant_transformed)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "greater")
|
||||
func_name = "greaterOrEquals";
|
||||
}
|
||||
|
||||
/// Replace <const> <sign> <data> on to <data> <-sign> <const>
|
||||
if (key_arg_pos == 1)
|
||||
{
|
||||
if (func_name == "less")
|
||||
func_name = "greater";
|
||||
else if (func_name == "greater")
|
||||
func_name = "less";
|
||||
else if (func_name == "greaterOrEquals")
|
||||
func_name = "lessOrEquals";
|
||||
else if (func_name == "lessOrEquals")
|
||||
func_name = "greaterOrEquals";
|
||||
else if (func_name == "in" || func_name == "notIn" || func_name == "like")
|
||||
{
|
||||
/// "const IN data_column" doesn't make sense (unlike "data_column IN const")
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
out.key_column = key_column_num;
|
||||
out.monotonic_functions_chain = std::move(chain);
|
||||
|
||||
const auto atom_it = atom_map.find(func_name);
|
||||
if (atom_it == std::end(atom_map))
|
||||
return false;
|
||||
|
||||
bool cast_not_needed =
|
||||
is_set_const /// Set args are already casted inside Set::createFromAST
|
||||
|| (isNativeNumber(key_expr_type) && isNativeNumber(const_type)); /// Numbers are accurately compared without cast.
|
||||
|
||||
if (!cast_not_needed)
|
||||
castValueToType(key_expr_type, const_value, const_type, node);
|
||||
out.key_column = key_column_num;
|
||||
out.monotonic_functions_chain = std::move(chain);
|
||||
|
||||
return atom_it->second(out, const_value);
|
||||
}
|
||||
@ -748,7 +797,6 @@ bool KeyCondition::atomFromAST(const ASTPtr & node, const Context & context, Blo
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -142,7 +142,7 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
||||
"like",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_LIKE;
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
@ -151,6 +151,66 @@ const MergeTreeConditionFullText::AtomMap MergeTreeConditionFullText::atom_map
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notLike",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_NOT_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & str = value.get<String>();
|
||||
likeStringToBloomFilter(str, idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"startsWith",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & prefix = value.get<String>();
|
||||
stringToBloomFilter(prefix.c_str(), prefix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"endsWith",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_EQUALS;
|
||||
out.bloom_filter = std::make_unique<BloomFilter>(
|
||||
idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
|
||||
const auto & suffix = value.get<String>();
|
||||
stringToBloomFilter(suffix.c_str(), suffix.size(), idx.token_extractor_func, *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"multiSearchAny",
|
||||
[] (RPNElement & out, const Field & value, const MergeTreeIndexFullText & idx)
|
||||
{
|
||||
out.function = RPNElement::FUNCTION_MULTI_SEARCH;
|
||||
|
||||
std::vector<std::vector<BloomFilter>> bloom_filters;
|
||||
bloom_filters.emplace_back();
|
||||
for (const auto & element : value.get<Array>())
|
||||
{
|
||||
if (element.getType() != Field::Types::String)
|
||||
return false;
|
||||
|
||||
bloom_filters.back().emplace_back(idx.bloom_filter_size, idx.bloom_filter_hashes, idx.seed);
|
||||
const auto & str = element.get<String>();
|
||||
stringToBloomFilter(str.c_str(), str.size(), idx.token_extractor_func, bloom_filters.back().back());
|
||||
}
|
||||
out.set_bloom_filters = std::move(bloom_filters);
|
||||
return true;
|
||||
}
|
||||
},
|
||||
{
|
||||
"notIn",
|
||||
[] (RPNElement & out, const Field &, const MergeTreeIndexFullText &)
|
||||
@ -197,10 +257,9 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_EQUALS
|
||||
|| element.function == RPNElement::FUNCTION_NOT_EQUALS
|
||||
|| element.function == RPNElement::FUNCTION_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_IN
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN
|
||||
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
||||
|| element.function == RPNElement::ALWAYS_FALSE)
|
||||
{
|
||||
rpn_stack.push_back(false);
|
||||
@ -255,17 +314,8 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (element.function == RPNElement::FUNCTION_NOT_EQUALS)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_LIKE
|
||||
|| element.function == RPNElement::FUNCTION_NOT_LIKE)
|
||||
{
|
||||
rpn_stack.emplace_back(
|
||||
granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
|
||||
|
||||
if (element.function == RPNElement::FUNCTION_NOT_LIKE)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_IN
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN)
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN)
|
||||
{
|
||||
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||
|
||||
@ -283,6 +333,18 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (element.function == RPNElement::FUNCTION_NOT_IN)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH)
|
||||
{
|
||||
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||
|
||||
const auto & bloom_filters = element.set_bloom_filters[0];
|
||||
|
||||
for (size_t row = 0; row < bloom_filters.size(); ++row)
|
||||
result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
|
||||
|
||||
rpn_stack.emplace_back(
|
||||
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_NOT)
|
||||
{
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
@ -343,8 +405,9 @@ bool MergeTreeConditionFullText::atomFromAST(
|
||||
|
||||
size_t key_arg_pos; /// Position of argument with key column (non-const argument)
|
||||
size_t key_column_num = -1; /// Number of a key column (inside key_column_names array)
|
||||
std::string func_name = func->name;
|
||||
|
||||
if (functionIsInOrGlobalInOperator(func->name) && tryPrepareSetBloomFilter(args, out))
|
||||
if (functionIsInOrGlobalInOperator(func_name) && tryPrepareSetBloomFilter(args, out))
|
||||
{
|
||||
key_arg_pos = 0;
|
||||
}
|
||||
@ -359,17 +422,17 @@ bool MergeTreeConditionFullText::atomFromAST(
|
||||
else
|
||||
return false;
|
||||
|
||||
if (const_type && const_type->getTypeId() != TypeIndex::String && const_type->getTypeId() != TypeIndex::FixedString)
|
||||
if (const_type && const_type->getTypeId() != TypeIndex::String
|
||||
&& const_type->getTypeId() != TypeIndex::FixedString
|
||||
&& const_type->getTypeId() != TypeIndex::Array)
|
||||
return false;
|
||||
|
||||
if (key_arg_pos == 1 && (func->name != "equals" || func->name != "notEquals"))
|
||||
if (key_arg_pos == 1 && (func_name != "equals" || func_name != "notEquals"))
|
||||
return false;
|
||||
else if (!index.token_extractor_func->supportLike() && (func->name == "like" || func->name == "notLike"))
|
||||
else if (!index.token_extractor_func->supportLike() && (func_name == "like" || func_name == "notLike"))
|
||||
return false;
|
||||
else
|
||||
key_arg_pos = 0;
|
||||
|
||||
const auto atom_it = atom_map.find(func->name);
|
||||
const auto atom_it = atom_map.find(func_name);
|
||||
if (atom_it == std::end(atom_map))
|
||||
return false;
|
||||
|
||||
@ -380,8 +443,8 @@ bool MergeTreeConditionFullText::atomFromAST(
|
||||
{
|
||||
/// Check constant like in KeyCondition
|
||||
if (const_value.getType() == Field::Types::UInt64
|
||||
|| const_value.getType() == Field::Types::Int64
|
||||
|| const_value.getType() == Field::Types::Float64)
|
||||
|| const_value.getType() == Field::Types::Int64
|
||||
|| const_value.getType() == Field::Types::Float64)
|
||||
{
|
||||
/// Zero in all types is represented in memory the same way as in UInt64.
|
||||
out.function = const_value.get<UInt64>()
|
||||
@ -475,7 +538,6 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter(
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
MergeTreeIndexGranulePtr MergeTreeIndexFullText::createIndexGranule() const
|
||||
{
|
||||
return std::make_shared<MergeTreeIndexGranuleFullText>(*this);
|
||||
|
@ -78,10 +78,9 @@ private:
|
||||
/// Atoms of a Boolean expression.
|
||||
FUNCTION_EQUALS,
|
||||
FUNCTION_NOT_EQUALS,
|
||||
FUNCTION_LIKE,
|
||||
FUNCTION_NOT_LIKE,
|
||||
FUNCTION_IN,
|
||||
FUNCTION_NOT_IN,
|
||||
FUNCTION_MULTI_SEARCH,
|
||||
FUNCTION_UNKNOWN, /// Can take any value.
|
||||
/// Operators of the logical expression.
|
||||
FUNCTION_NOT,
|
||||
@ -93,15 +92,20 @@ private:
|
||||
};
|
||||
|
||||
RPNElement(
|
||||
Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<BloomFilter> && const_bloom_filter_ = nullptr)
|
||||
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
||||
Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr<BloomFilter> && const_bloom_filter_ = nullptr)
|
||||
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
||||
|
||||
Function function = FUNCTION_UNKNOWN;
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_LIKE, FUNCTION_NOT_LIKE.
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH
|
||||
size_t key_column;
|
||||
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
|
||||
std::unique_ptr<BloomFilter> bloom_filter;
|
||||
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
||||
|
||||
/// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH
|
||||
std::vector<std::vector<BloomFilter>> set_bloom_filters;
|
||||
|
||||
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
||||
std::vector<size_t> set_key_position;
|
||||
};
|
||||
|
||||
|
@ -411,7 +411,10 @@ static bool checkAtomName(const String & name)
|
||||
"greaterOrEquals",
|
||||
"in",
|
||||
"notIn",
|
||||
"like"
|
||||
"like",
|
||||
"startsWith",
|
||||
"endsWith",
|
||||
"multiSearchAny"
|
||||
};
|
||||
return atoms.find(name) != atoms.end();
|
||||
}
|
||||
|
@ -46,13 +46,13 @@ MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream(
|
||||
for (const auto & range : all_mark_ranges)
|
||||
total_marks_count += range.end - range.begin;
|
||||
|
||||
size_t total_rows = data_part->index_granularity.getTotalRows();
|
||||
size_t total_rows = data_part->index_granularity.getRowsCountInRanges(all_mark_ranges);
|
||||
|
||||
if (!quiet)
|
||||
LOG_TRACE(log, "Reading " << all_mark_ranges.size() << " ranges from part " << data_part->name
|
||||
<< ", approx. " << total_rows
|
||||
<< (all_mark_ranges.size() > 1
|
||||
? ", up to " + toString(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges))
|
||||
? ", up to " + toString(total_rows)
|
||||
: "")
|
||||
<< " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
|
||||
|
||||
|
10
dbms/tests/config/query_masking_rules.xml
Normal file
10
dbms/tests/config/query_masking_rules.xml
Normal file
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0"?>
|
||||
<!-- Config for test server -->
|
||||
<yandex>
|
||||
<query_masking_rules>
|
||||
<rule>
|
||||
<regexp>TOPSECRET.TOPSECRET</regexp>
|
||||
<replace>[hidden]</replace>
|
||||
</rule>
|
||||
</query_masking_rules>
|
||||
</yandex>
|
@ -11,13 +11,14 @@ node18_14 = cluster.add_instance('node18_14', image='yandex/clickhouse-server:18
|
||||
node19_1 = cluster.add_instance('node19_1', image='yandex/clickhouse-server:19.1.16', with_installed_binary=True)
|
||||
node19_4 = cluster.add_instance('node19_4', image='yandex/clickhouse-server:19.4.5.35', with_installed_binary=True)
|
||||
node19_6 = cluster.add_instance('node19_6', image='yandex/clickhouse-server:19.6.3.18', with_installed_binary=True)
|
||||
node19_8 = cluster.add_instance('node19_8', image='yandex/clickhouse-server:19.8.3.8', with_installed_binary=True)
|
||||
node_new = cluster.add_instance('node_new')
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def setup_nodes():
|
||||
try:
|
||||
cluster.start()
|
||||
for n in (node18_14, node19_1, node19_4, node19_6, node_new):
|
||||
for n in (node18_14, node19_1, node19_4, node19_6, node19_8, node_new):
|
||||
n.query('''CREATE TABLE test_table (id UInt32, value UInt64) ENGINE = MergeTree() ORDER BY tuple()''')
|
||||
|
||||
yield cluster
|
||||
@ -29,7 +30,7 @@ def query_from_one_node_to_another(client_node, server_node, query):
|
||||
client_node.exec_in_container(["bash", "-c", "/usr/bin/clickhouse client --host {} --query '{}'".format(server_node.name, query)])
|
||||
|
||||
def test_client_from_different_versions(setup_nodes):
|
||||
old_nodes = (node18_14, node19_1, node19_4, node19_6,)
|
||||
old_nodes = (node18_14, node19_1, node19_4, node19_6, node19_8)
|
||||
# from new to old
|
||||
for n in old_nodes:
|
||||
query_from_one_node_to_another(node_new, n, "INSERT INTO test_table VALUES (1, 1)")
|
||||
|
@ -0,0 +1,53 @@
|
||||
9 abra
|
||||
14 abracadabra
|
||||
"rows_read": 6,
|
||||
8 computer science
|
||||
"rows_read": 2,
|
||||
9 abra
|
||||
10 cadabra
|
||||
11 crabacadabra
|
||||
14 abracadabra
|
||||
15 cadabraabra
|
||||
"rows_read": 6,
|
||||
6 some string
|
||||
7 another string
|
||||
"rows_read": 2,
|
||||
9 abra
|
||||
14 abracadabra
|
||||
"rows_read": 6,
|
||||
8 computer science
|
||||
"rows_read": 2,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||
2 column-oriented database management system
|
||||
13 basement
|
||||
"rows_read": 6,
|
||||
6 some string
|
||||
7 another string
|
||||
"rows_read": 2,
|
||||
6 some string
|
||||
7 another string
|
||||
8 computer science
|
||||
"rows_read": 4,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||
2 column-oriented database management system
|
||||
13 basement
|
||||
"rows_read": 6,
|
||||
9 abra
|
||||
10 cadabra
|
||||
11 crabacadabra
|
||||
14 abracadabra
|
||||
15 cadabraabra
|
||||
"rows_read": 6,
|
||||
4 какая-то строка
|
||||
5 еще строка
|
||||
6 some string
|
||||
7 another string
|
||||
"rows_read": 4,
|
||||
14 abracadabra
|
||||
"rows_read": 4,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||
2 column-oriented database management system
|
||||
10 cadabra
|
||||
11 crabacadabra
|
||||
15 cadabraabra
|
||||
"rows_read": 8,
|
86
dbms/tests/queries/0_stateless/00964_bloom_index_string_functions.sh
Executable file
86
dbms/tests/queries/0_stateless/00964_bloom_index_string_functions.sh
Executable file
@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx;"
|
||||
|
||||
# NGRAM BF
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
SET allow_experimental_data_skipping_indices = 1;
|
||||
CREATE TABLE bloom_filter_idx
|
||||
(
|
||||
k UInt64,
|
||||
s String,
|
||||
INDEX bf (s, lower(s)) TYPE ngrambf_v1(3, 512, 2, 0) GRANULARITY 1
|
||||
) ENGINE = MergeTree()
|
||||
ORDER BY k
|
||||
SETTINGS index_granularity = 2;"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO bloom_filter_idx VALUES
|
||||
(0, 'ClickHouse - столбцовая система управления базами данных (СУБД)'),
|
||||
(1, 'ClickHouse is a column-oriented database management system (DBMS)'),
|
||||
(2, 'column-oriented database management system'),
|
||||
(3, 'columns'),
|
||||
(4, 'какая-то строка'),
|
||||
(5, 'еще строка'),
|
||||
(6, 'some string'),
|
||||
(7, 'another string'),
|
||||
(8, 'computer science'),
|
||||
(9, 'abra'),
|
||||
(10, 'cadabra'),
|
||||
(11, 'crabacadabra'),
|
||||
(12, 'crab'),
|
||||
(13, 'basement'),
|
||||
(14, 'abracadabra'),
|
||||
(15, 'cadabraabra')"
|
||||
|
||||
# STARTS_WITH
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'computer') ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'computer') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# ENDS_WITH
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'abra') ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'abra') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'ring') ORDER BY k"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE endsWith(s, 'ring') ORDER BY k FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# COMBINED
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'c') AND endsWith(s, 'science')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE startsWith(s, 'c') AND endsWith(s, 'science') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# MULTY_SEARCH_ANY
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['data', 'base'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['data', 'base']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string', 'computer'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['string', 'computer']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['base', 'seme', 'gement'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['base', 'seme', 'gement']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['abra', 'cadabra', 'cab', 'extra'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['abra', 'cadabra', 'cab', 'extra']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['строка', 'string'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['строка', 'string']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# MULTY_SEARCH_ANY + OTHER
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND startsWith(s, 'abra')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND startsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND (startsWith(s, 'c') OR startsWith(s, 'C'))"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE multiSearchAny(s, ['adab', 'cad', 'aba']) AND (startsWith(s, 'c') OR startsWith(s, 'C')) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx;"
|
@ -0,0 +1,16 @@
|
||||
9 abra
|
||||
14 abracadabra
|
||||
"rows_read": 4,
|
||||
9 abra
|
||||
10 cadabra
|
||||
11 crabacadabra
|
||||
14 abracadabra
|
||||
15 cadabraabra
|
||||
"rows_read": 6,
|
||||
9 abra
|
||||
14 abracadabra
|
||||
"rows_read": 4,
|
||||
1 ClickHouse is a column-oriented database management system (DBMS)
|
||||
2 column-oriented database management system
|
||||
13 basement
|
||||
"rows_read": 6,
|
53
dbms/tests/queries/0_stateless/00965_set_index_string_functions.sh
Executable file
53
dbms/tests/queries/0_stateless/00965_set_index_string_functions.sh
Executable file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;"
|
||||
|
||||
$CLICKHOUSE_CLIENT -n --query="
|
||||
SET allow_experimental_data_skipping_indices = 1;
|
||||
CREATE TABLE set_idx
|
||||
(
|
||||
k UInt64,
|
||||
s String,
|
||||
INDEX idx (s) TYPE set(2) GRANULARITY 1
|
||||
) ENGINE = MergeTree()
|
||||
ORDER BY k
|
||||
SETTINGS index_granularity = 2;"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="INSERT INTO set_idx VALUES
|
||||
(0, 'ClickHouse - столбцовая система управления базами данных (СУБД)'),
|
||||
(1, 'ClickHouse is a column-oriented database management system (DBMS)'),
|
||||
(2, 'column-oriented database management system'),
|
||||
(3, 'columns'),
|
||||
(4, 'какая-то строка'),
|
||||
(5, 'еще строка'),
|
||||
(6, 'some string'),
|
||||
(7, 'another string'),
|
||||
(8, 'computer science'),
|
||||
(9, 'abra'),
|
||||
(10, 'cadabra'),
|
||||
(11, 'crabacadabra'),
|
||||
(12, 'crab'),
|
||||
(13, 'basement'),
|
||||
(14, 'abracadabra'),
|
||||
(15, 'cadabraabra')"
|
||||
|
||||
# STARTS_WITH
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# ENDS_WITH
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE endsWith(s, 'abra')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# COMBINED
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra')"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE startsWith(s, 'abra') AND endsWith(s, 'abra') FORMAT JSON" | grep "rows_read"
|
||||
|
||||
# MULTY_SEARCH_ANY
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE multiSearchAny(s, ['data', 'base'])"
|
||||
$CLICKHOUSE_CLIENT --query="SELECT * FROM set_idx WHERE multiSearchAny(s, ['data', 'base']) FORMAT JSON" | grep "rows_read"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query="DROP TABLE set_idx;"
|
@ -0,0 +1,2 @@
|
||||
1000000
|
||||
1000000
|
@ -0,0 +1,22 @@
|
||||
DROP TABLE IF EXISTS merge_tree;
|
||||
CREATE TABLE merge_tree (x UInt8) ENGINE = MergeTree ORDER BY x;
|
||||
INSERT INTO merge_tree SELECT 0 FROM numbers(1000000);
|
||||
|
||||
SET max_threads = 4;
|
||||
SET max_rows_to_read = 1100000;
|
||||
|
||||
SET merge_tree_uniform_read_distribution = 1;
|
||||
SELECT count() FROM merge_tree;
|
||||
|
||||
SET merge_tree_uniform_read_distribution = 0;
|
||||
SELECT count() FROM merge_tree;
|
||||
|
||||
SET max_rows_to_read = 900000;
|
||||
|
||||
SET merge_tree_uniform_read_distribution = 1;
|
||||
SELECT count() FROM merge_tree; -- { serverError 158 }
|
||||
|
||||
SET merge_tree_uniform_read_distribution = 0;
|
||||
SELECT count() FROM merge_tree; -- { serverError 158 }
|
||||
|
||||
DROP TABLE merge_tree;
|
9
dbms/tests/queries/0_stateless/00971_query_id_in_logs.sh
Executable file
9
dbms/tests/queries/0_stateless/00971_query_id_in_logs.sh
Executable file
@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CURDIR/../shell_config.sh
|
||||
|
||||
set -e
|
||||
|
||||
# No log lines without query id
|
||||
$CLICKHOUSE_CLIENT --send_logs_level=trace --query_id=hello --query="SELECT count() FROM numbers(10)" 2>&1 | grep -vF ' {hello} ' | grep -P '<\w+>' ||:
|
@ -117,4 +117,10 @@
|
||||
<path>/clickhouse/task_queue/ddl</path>
|
||||
</distributed_ddl>
|
||||
<format_schema_path>/tmp/clickhouse/data/format_schemas/</format_schema_path>
|
||||
<query_masking_rules>
|
||||
<rule>
|
||||
<regexp>TOPSECRET.TOPSECRET</regexp>
|
||||
<replace>[hidden]</replace>
|
||||
</rule>
|
||||
</query_masking_rules>
|
||||
</yandex>
|
||||
|
@ -39,6 +39,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \
|
||||
ln -s /usr/share/clickhouse-test/config/zookeeper.xml /etc/clickhouse-server/config.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/listen.xml /etc/clickhouse-server/config.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/part_log.xml /etc/clickhouse-server/config.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/log_queries.xml /etc/clickhouse-server/users.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/readonly.xml /etc/clickhouse-server/users.d/; \
|
||||
ln -s /usr/share/clickhouse-test/config/ints_dictionary.xml /etc/clickhouse-server/; \
|
||||
|
@ -323,7 +323,7 @@ When using this format, ClickHouse outputs rows as separated, newline-delimited
|
||||
```json
|
||||
{"SearchPhrase":"curtain designs","count()":"1064"}
|
||||
{"SearchPhrase":"baku","count()":"1000"}
|
||||
{"SearchPhrase":"","count":"8267016"}
|
||||
{"SearchPhrase":"","count()":"8267016"}
|
||||
```
|
||||
|
||||
When inserting the data, you should provide a separate JSON object for each row.
|
||||
@ -386,6 +386,60 @@ Unlike the [JSON](#json) format, there is no substitution of invalid UTF-8 seque
|
||||
!!! note "Note"
|
||||
Any set of bytes can be output in the strings. Use the `JSONEachRow` format if you are sure that the data in the table can be formatted as JSON without losing any information.
|
||||
|
||||
### Usage of Nested Structures {#jsoneachrow-nested}
|
||||
|
||||
If you have a table with the [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data having the same structure. Enable this functionality with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
|
||||
|
||||
For example, consider the following table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE json_each_row_nested (n Nested (s String, i Int32) ) ENGINE = Memory
|
||||
```
|
||||
|
||||
As you can find in the `Nested` data type description, ClickHouse treats each component of the nested structure as a separate column, `n.s` and `n.i` for our table. So you can insert the data the following way:
|
||||
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]}
|
||||
```
|
||||
|
||||
To insert data as hierarchical JSON object set [input_format_import_nested_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
|
||||
|
||||
```json
|
||||
{
|
||||
"n": {
|
||||
"s": ["abc", "def"],
|
||||
"i": [1, 23]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Without this setting ClickHouse throws the exception.
|
||||
|
||||
```sql
|
||||
SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
|
||||
```
|
||||
```text
|
||||
┌─name────────────────────────────┬─value─┐
|
||||
│ input_format_import_nested_json │ 0 │
|
||||
└─────────────────────────────────┴───────┘
|
||||
```
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
```
|
||||
```text
|
||||
Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
|
||||
```
|
||||
```sql
|
||||
SET input_format_import_nested_json=1
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
SELECT * FROM json_each_row_nested
|
||||
```
|
||||
```text
|
||||
┌─n.s───────────┬─n.i────┐
|
||||
│ ['abc','def'] │ [1,23] │
|
||||
└───────────────┴────────┘
|
||||
```
|
||||
|
||||
## Native {#native}
|
||||
|
||||
The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is "columnar" – it doesn't convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients.
|
||||
|
@ -231,6 +231,25 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## input_format_import_nested_json {#settings-input_format_import_nested_json}
|
||||
|
||||
Enables or disables inserting of JSON data with nested objects.
|
||||
|
||||
Supported formats:
|
||||
|
||||
- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
|
||||
|
||||
## input_format_with_names_use_header {#settings-input_format_with_names_use_header}
|
||||
|
||||
Enables or disables checking the column order when inserting data.
|
||||
@ -249,6 +268,27 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
## date_time_input_format {#settings-date_time_input_format}
|
||||
|
||||
Enables or disables extended parsing of date and time formatted strings.
|
||||
|
||||
The setting doesn't apply to [date and time functions](../../query_language/functions/date_time_functions.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- `'best_effort'` — Enables extended parsing.
|
||||
|
||||
ClickHouse can parse the basic format `YYYY-MM-DD HH:MM:SS` and all the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`.
|
||||
|
||||
- `'basic'` — Use basic parser.
|
||||
|
||||
ClickHouse can parse only the basic format.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [DateTime data type.](../../data_types/datetime.md)
|
||||
- [Functions for working with dates and times.](../../query_language/functions/date_time_functions.md)
|
||||
|
||||
## join_default_strictness {#settings-join_default_strictness}
|
||||
|
||||
Sets default strictness for [JOIN clauses](../../query_language/select.md#select-join).
|
||||
|
@ -58,11 +58,10 @@ arrayConcat(arrays)
|
||||
- `arrays` – Arbitrary number of arguments of [Array](../../data_types/array.md) type.
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───────────┐
|
||||
│ [1,2,3,4,5,6] │
|
||||
└───────────────┘
|
||||
@ -204,7 +203,7 @@ Returns the array \[1, 2, 3, ..., length (arr) \]
|
||||
|
||||
This function is normally used with ARRAY JOIN. It allows counting something just once for each array after applying ARRAY JOIN. Example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
count() AS Reaches,
|
||||
countIf(num = 1) AS Hits
|
||||
@ -215,8 +214,7 @@ ARRAY JOIN
|
||||
WHERE CounterID = 160656
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─Reaches─┬──Hits─┐
|
||||
│ 95606 │ 31406 │
|
||||
└─────────┴───────┘
|
||||
@ -224,15 +222,14 @@ LIMIT 10
|
||||
|
||||
In this example, Reaches is the number of conversions (the strings received after applying ARRAY JOIN), and Hits is the number of pageviews (strings before ARRAY JOIN). In this particular case, you can get the same result in an easier way:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
sum(length(GoalsReached)) AS Reaches,
|
||||
count() AS Hits
|
||||
FROM test.hits
|
||||
WHERE (CounterID = 160656) AND notEmpty(GoalsReached)
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─Reaches─┬──Hits─┐
|
||||
│ 95606 │ 31406 │
|
||||
└─────────┴───────┘
|
||||
@ -248,7 +245,7 @@ For example: arrayEnumerateUniq(\[10, 20, 10, 30\]) = \[1, 1, 2, 1\].
|
||||
This function is useful when using ARRAY JOIN and aggregation of array elements.
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT
|
||||
Goals.ID AS GoalID,
|
||||
sum(Sign) AS Reaches,
|
||||
@ -262,8 +259,7 @@ GROUP BY GoalID
|
||||
ORDER BY Reaches DESC
|
||||
LIMIT 10
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌──GoalID─┬─Reaches─┬─Visits─┐
|
||||
│ 53225 │ 3214 │ 1097 │
|
||||
│ 2825062 │ 3188 │ 1097 │
|
||||
@ -282,11 +278,10 @@ In this example, each goal ID has a calculation of the number of conversions (ea
|
||||
|
||||
The arrayEnumerateUniq function can take multiple arrays of the same size as arguments. In this case, uniqueness is considered for tuples of elements in the same positions in all the arrays.
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───────────┐
|
||||
│ [1,2,1,1,2,1] │
|
||||
└───────────────┘
|
||||
@ -308,11 +303,10 @@ arrayPopBack(array)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayPopBack([1, 2, 3]) AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───┐
|
||||
│ [1,2] │
|
||||
└───────┘
|
||||
@ -332,11 +326,10 @@ arrayPopFront(array)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayPopFront([1, 2, 3]) AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───┐
|
||||
│ [2,3] │
|
||||
└───────┘
|
||||
@ -357,11 +350,10 @@ arrayPushBack(array, single_value)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayPushBack(['a'], 'b') AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───────┐
|
||||
│ ['a','b'] │
|
||||
└───────────┘
|
||||
@ -382,11 +374,10 @@ arrayPushFront(array, single_value)
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayPushBack(['b'], 'a') AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res───────┐
|
||||
│ ['a','b'] │
|
||||
└───────────┘
|
||||
@ -446,11 +437,10 @@ arraySlice(array, offset[, length])
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res────────┐
|
||||
│ [2,NULL,4] │
|
||||
└────────────┘
|
||||
@ -464,10 +454,10 @@ Sorts the elements of the `arr` array in ascending order. If the `func` function
|
||||
|
||||
Example of integer values sorting:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort([1, 3, 3, 0]);
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─arraySort([1, 3, 3, 0])─┐
|
||||
│ [0,1,3,3] │
|
||||
└─────────────────────────┘
|
||||
@ -475,10 +465,10 @@ SELECT arraySort([1, 3, 3, 0]);
|
||||
|
||||
Example of string values sorting:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort(['hello', 'world', '!']);
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─arraySort(['hello', 'world', '!'])─┐
|
||||
│ ['!','hello','world'] │
|
||||
└────────────────────────────────────┘
|
||||
@ -486,10 +476,10 @@ SELECT arraySort(['hello', 'world', '!']);
|
||||
|
||||
Consider the following sorting order for the `NULL`, `NaN` and `Inf` values:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]);
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf])─┐
|
||||
│ [-inf,-4,1,2,3,inf,nan,nan,NULL,NULL] │
|
||||
└───────────────────────────────────────────────────────────┘
|
||||
@ -504,10 +494,10 @@ Note that `arraySort` is a [higher-order function](higher_order_functions.md). Y
|
||||
|
||||
Let's consider the following example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [3,2,1] │
|
||||
└─────────┘
|
||||
@ -517,11 +507,10 @@ For each element of the source array, the lambda function returns the sorting ke
|
||||
|
||||
The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]) as res;
|
||||
```
|
||||
|
||||
```
|
||||
```text
|
||||
┌─res────────────────┐
|
||||
│ ['world', 'hello'] │
|
||||
└────────────────────┘
|
||||
@ -531,19 +520,19 @@ Here, the elements that are passed in the second array ([2, 1]) define a sorting
|
||||
|
||||
Other examples are shown below.
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort((x, y) -> y, [0, 1, 2], ['c', 'b', 'a']) as res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [2,1,0] │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [2,1,0] │
|
||||
└─────────┘
|
||||
@ -558,10 +547,10 @@ Sorts the elements of the `arr` array in descending order. If the `func` functio
|
||||
|
||||
Example of integer values sorting:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort([1, 3, 3, 0]);
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─arrayReverseSort([1, 3, 3, 0])─┐
|
||||
│ [3,3,1,0] │
|
||||
└────────────────────────────────┘
|
||||
@ -569,10 +558,10 @@ SELECT arrayReverseSort([1, 3, 3, 0]);
|
||||
|
||||
Example of string values sorting:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort(['hello', 'world', '!']);
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─arrayReverseSort(['hello', 'world', '!'])─┐
|
||||
│ ['world','hello','!'] │
|
||||
└───────────────────────────────────────────┘
|
||||
@ -580,10 +569,10 @@ SELECT arrayReverseSort(['hello', 'world', '!']);
|
||||
|
||||
Consider the following sorting order for the `NULL`, `NaN` and `Inf` values:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res───────────────────────────────────┐
|
||||
│ [inf,3,2,1,-4,-inf,nan,nan,NULL,NULL] │
|
||||
└───────────────────────────────────────┘
|
||||
@ -596,10 +585,10 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res;
|
||||
|
||||
Note that the `arrayReverseSort` is a [higher-order function](higher_order_functions.md). You can pass a lambda function to it as the first argument. Example is shown below.
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res;
|
||||
```
|
||||
```
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [1,2,3] │
|
||||
└─────────┘
|
||||
@ -612,10 +601,10 @@ The array is sorted in the following way:
|
||||
|
||||
The lambda function can accept multiple arguments. In this case, you need to pass the `arrayReverseSort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res───────────────┐
|
||||
│ ['hello','world'] │
|
||||
└───────────────────┘
|
||||
@ -628,18 +617,18 @@ In this example, the array is sorted in the following way:
|
||||
|
||||
Other examples are shown below.
|
||||
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort((x, y) -> y, [4, 3, 5], ['a', 'b', 'c']) AS res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [5,3,4] │
|
||||
└─────────┘
|
||||
```
|
||||
``` sql
|
||||
```sql
|
||||
SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res;
|
||||
```
|
||||
``` sql
|
||||
```text
|
||||
┌─res─────┐
|
||||
│ [4,3,5] │
|
||||
└─────────┘
|
||||
|
@ -212,7 +212,7 @@ SELECT metroHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:
|
||||
## jumpConsistentHash
|
||||
|
||||
Calculates JumpConsistentHash form a UInt64.
|
||||
Accepts a UInt64-type argument. Returns Int32.
|
||||
Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32.
|
||||
For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf)
|
||||
|
||||
## murmurHash2_32, murmurHash2_64
|
||||
|
@ -328,6 +328,60 @@ JSON با جاوااسکریپت سازگار است. برای اطمینان ا
|
||||
|
||||
برای پارس کردن، هر ترتیبی برای مقادیر ستون های مختلف پشتیبانی می شود. حذف شدن بعضی مقادیر قابل قبول است، آنها با مقادیر پیش فرض خود برابر هستند. در این مورد، صفر و سطر های خالی به عنوان مقادیر پیش فرض قرار می گیرند. مقادیر پیچیده که می توانند در جدول مشخص شوند، به عنوان مقادیر پیش فرض پشتیبانی نمی شوند. Whitespace بین element ها نادیده گرفته می شوند. اگر کاما بعد از object ها قرار گیرند، نادیده گرفته می شوند. object ها نیازی به جداسازی با استفاده از new line را ندارند.
|
||||
|
||||
### Usage of Nested Structures {#jsoneachrow-nested}
|
||||
|
||||
If you have a table with the [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data having the same structure. Enable this functionality with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
|
||||
|
||||
For example, consider the following table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE json_each_row_nested (n Nested (s String, i Int32) ) ENGINE = Memory
|
||||
```
|
||||
|
||||
As you can find in the `Nested` data type description, ClickHouse treats each component of the nested structure as a separate column, `n.s` and `n.i` for our table. So you can insert the data the following way:
|
||||
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]}
|
||||
```
|
||||
|
||||
To insert data as hierarchical JSON object set [input_format_import_nested_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
|
||||
|
||||
```json
|
||||
{
|
||||
"n": {
|
||||
"s": ["abc", "def"],
|
||||
"i": [1, 23]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Without this setting ClickHouse throws the exception.
|
||||
|
||||
```sql
|
||||
SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
|
||||
```
|
||||
```text
|
||||
┌─name────────────────────────────┬─value─┐
|
||||
│ input_format_import_nested_json │ 0 │
|
||||
└─────────────────────────────────┴───────┘
|
||||
```
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
```
|
||||
```text
|
||||
Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
|
||||
```
|
||||
```sql
|
||||
SET input_format_import_nested_json=1
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
SELECT * FROM json_each_row_nested
|
||||
```
|
||||
```text
|
||||
┌─n.s───────────┬─n.i────┐
|
||||
│ ['abc','def'] │ [1,23] │
|
||||
└───────────────┴────────┘
|
||||
```
|
||||
|
||||
## Native
|
||||
|
||||
کارآمدترین فرمت. داده ها توسط بلاک ها و در فرمت باینری نوشته و خوانده می شوند. برای هر بلاک، تعداد سطرها، تعداد ستون ها، نام ستون ها و type آنها، و بخش هایی از ستون ها در این بلاک یکی پس از دیگری ثبت می شوند. به عبارت دیگر، این فرمت "columnar" است - این فرمت ستون ها را به سطر تبدیل نمی کند. این فرمت در حالت native interface و بین سرور و محیط ترمینال و همچنین کلاینت C++ استفاده می شود.
|
||||
|
@ -327,6 +327,60 @@ ClickHouse 支持 [NULL](../query_language/syntax.md), 在 JSON 格式中以 `nu
|
||||
|
||||
对于解析,任何顺序都支持不同列的值。可以省略某些值 - 它们被视为等于它们的默认值。在这种情况下,零和空行被用作默认值。 作为默认值,不支持表中指定的复杂值。元素之间的空白字符被忽略。如果在对象之后放置逗号,它将被忽略。对象不一定必须用新行分隔。
|
||||
|
||||
### Usage of Nested Structures {#jsoneachrow-nested}
|
||||
|
||||
If you have a table with the [Nested](../data_types/nested_data_structures/nested.md) data type columns, you can insert JSON data having the same structure. Enable this functionality with the [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json) setting.
|
||||
|
||||
For example, consider the following table:
|
||||
|
||||
```sql
|
||||
CREATE TABLE json_each_row_nested (n Nested (s String, i Int32) ) ENGINE = Memory
|
||||
```
|
||||
|
||||
As you can find in the `Nested` data type description, ClickHouse treats each component of the nested structure as a separate column, `n.s` and `n.i` for our table. So you can insert the data the following way:
|
||||
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]}
|
||||
```
|
||||
|
||||
To insert data as hierarchical JSON object set [input_format_import_nested_json=1](../operations/settings/settings.md#settings-input_format_import_nested_json).
|
||||
|
||||
```json
|
||||
{
|
||||
"n": {
|
||||
"s": ["abc", "def"],
|
||||
"i": [1, 23]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Without this setting ClickHouse throws the exception.
|
||||
|
||||
```sql
|
||||
SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
|
||||
```
|
||||
```text
|
||||
┌─name────────────────────────────┬─value─┐
|
||||
│ input_format_import_nested_json │ 0 │
|
||||
└─────────────────────────────────┴───────┘
|
||||
```
|
||||
```sql
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
```
|
||||
```text
|
||||
Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
|
||||
```
|
||||
```sql
|
||||
SET input_format_import_nested_json=1
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
SELECT * FROM json_each_row_nested
|
||||
```
|
||||
```text
|
||||
┌─n.s───────────┬─n.i────┐
|
||||
│ ['abc','def'] │ [1,23] │
|
||||
└───────────────┴────────┘
|
||||
```
|
||||
|
||||
## Native {#native}
|
||||
|
||||
最高性能的格式。 据通过二进制格式的块进行写入和读取。对于每个块,该块中的行数,列数,列名称和类型以及列的部分将被相继记录。 换句话说,这种格式是 “列式”的 - 它不会将列转换为行。 这是用于在服务器之间进行交互的本地界面中使用的格式,用于使用命令行客户端和 C++ 客户端。
|
||||
|
Loading…
Reference in New Issue
Block a user