mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into zvonand-issue-49290
This commit is contained in:
commit
4884022fda
@ -1255,3 +1255,15 @@ Result:
|
||||
│ A240 │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## initcap
|
||||
|
||||
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
||||
|
||||
## initcapUTF8
|
||||
|
||||
Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
@ -1113,3 +1113,14 @@ A text with tags .
|
||||
The content within <b>CDATA</b>
|
||||
Do Nothing for 2 Minutes 2:00
|
||||
```
|
||||
|
||||
## initcap {#initcap}
|
||||
|
||||
Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
|
||||
|
||||
## initcapUTF8 {#initcapUTF8}
|
||||
|
||||
Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
|
||||
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
|
||||
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
|
||||
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
|
||||
|
@ -8,7 +8,6 @@
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/Arena.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
|
@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
const auto & insertion_table = scope_context->getInsertionTable();
|
||||
if (!insertion_table.empty())
|
||||
{
|
||||
const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
|
||||
const auto & insert_structure = DatabaseCatalog::instance()
|
||||
.getTable(insertion_table, scope_context)
|
||||
->getInMemoryMetadataPtr()
|
||||
->getColumns()
|
||||
.getInsertable();
|
||||
DB::ColumnsDescription structure_hint;
|
||||
|
||||
bool use_columns_from_insert_query = true;
|
||||
|
@ -34,7 +34,7 @@ try
|
||||
DB::Memory<> memory;
|
||||
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
|
||||
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ try
|
||||
DB::Memory<> memory;
|
||||
memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());
|
||||
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -292,10 +292,10 @@ try
|
||||
|
||||
DB::Memory<> memory;
|
||||
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
|
||||
codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
|
||||
codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
|
||||
|
||||
memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
|
||||
codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
|
||||
codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
|
||||
return 0;
|
||||
}
|
||||
catch (...)
|
||||
|
@ -24,7 +24,7 @@ try
|
||||
return 0;
|
||||
|
||||
const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
|
||||
auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
|
||||
auto codec = DB::getCompressionCodecLZ4(p->level);
|
||||
|
||||
size_t output_buffer_size = p->decompressed_size % 65536;
|
||||
size -= sizeof(AuxiliaryRandomData);
|
||||
@ -37,7 +37,7 @@ try
|
||||
DB::Memory<> memory;
|
||||
memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);
|
||||
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
|
||||
codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -28,7 +28,6 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int TOO_DEEP_RECURSION;
|
||||
}
|
||||
|
||||
constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity};
|
||||
@ -42,13 +41,10 @@ using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
|
||||
/// construct a Field of Array or a Tuple type. An alternative approach would be
|
||||
/// to construct both of these types from FieldVector, and have the caller
|
||||
/// specify the desired Field type explicitly.
|
||||
/// As the result stack overflow on destruction is possible
|
||||
/// and to avoid it we need to count the depth and have a threshold.
|
||||
#define DEFINE_FIELD_VECTOR(X) \
|
||||
struct X : public FieldVector \
|
||||
{ \
|
||||
using FieldVector::FieldVector; \
|
||||
uint8_t nested_field_depth = 0; \
|
||||
}
|
||||
|
||||
DEFINE_FIELD_VECTOR(Array);
|
||||
@ -65,7 +61,6 @@ using FieldMap = std::map<String, Field, std::less<>, AllocatorWithMemoryTrackin
|
||||
struct X : public FieldMap \
|
||||
{ \
|
||||
using FieldMap::FieldMap; \
|
||||
uint8_t nested_field_depth = 0; \
|
||||
}
|
||||
|
||||
DEFINE_FIELD_MAP(Object);
|
||||
@ -296,12 +291,6 @@ decltype(auto) castToNearestFieldType(T && x)
|
||||
*/
|
||||
#define DBMS_MIN_FIELD_SIZE 32
|
||||
|
||||
/// Note: uint8_t is used for storing depth value.
|
||||
#if defined(SANITIZER) || !defined(NDEBUG)
|
||||
#define DBMS_MAX_NESTED_FIELD_DEPTH 64
|
||||
#else
|
||||
#define DBMS_MAX_NESTED_FIELD_DEPTH 255
|
||||
#endif
|
||||
|
||||
/** Discriminated union of several types.
|
||||
* Made for replacement of `boost::variant`
|
||||
@ -682,49 +671,6 @@ private:
|
||||
|
||||
Types::Which which;
|
||||
|
||||
/// StorageType and Original are the same for Array, Tuple, Map, Object
|
||||
template <typename StorageType, typename Original>
|
||||
uint8_t calculateAndCheckFieldDepth(Original && x)
|
||||
{
|
||||
uint8_t result = 0;
|
||||
|
||||
if constexpr (std::is_same_v<StorageType, Array>
|
||||
|| std::is_same_v<StorageType, Tuple>
|
||||
|| std::is_same_v<StorageType, Map>
|
||||
|| std::is_same_v<StorageType, Object>)
|
||||
{
|
||||
result = x.nested_field_depth;
|
||||
|
||||
auto get_depth = [](const Field & elem)
|
||||
{
|
||||
switch (elem.which)
|
||||
{
|
||||
case Types::Array:
|
||||
return elem.template get<Array>().nested_field_depth;
|
||||
case Types::Tuple:
|
||||
return elem.template get<Tuple>().nested_field_depth;
|
||||
case Types::Map:
|
||||
return elem.template get<Map>().nested_field_depth;
|
||||
case Types::Object:
|
||||
return elem.template get<Object>().nested_field_depth;
|
||||
default:
|
||||
return static_cast<uint8_t>(0);
|
||||
}
|
||||
};
|
||||
|
||||
if constexpr (std::is_same_v<StorageType, Object>)
|
||||
for (auto & [_, value] : x)
|
||||
result = std::max(get_depth(value), result);
|
||||
else
|
||||
for (auto & value : x)
|
||||
result = std::max(get_depth(value), result);
|
||||
}
|
||||
|
||||
if (result >= DBMS_MAX_NESTED_FIELD_DEPTH)
|
||||
throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Assuming there was no allocated state or it was deallocated (see destroy).
|
||||
template <typename T>
|
||||
@ -738,17 +684,7 @@ private:
|
||||
// we must initialize the entire wide stored type, and not just the
|
||||
// nominal type.
|
||||
using StorageType = NearestFieldType<UnqualifiedType>;
|
||||
|
||||
/// Incrementing the depth since we create a new Field.
|
||||
auto depth = calculateAndCheckFieldDepth<StorageType>(x);
|
||||
new (&storage) StorageType(std::forward<T>(x));
|
||||
|
||||
if constexpr (std::is_same_v<StorageType, Array>
|
||||
|| std::is_same_v<StorageType, Tuple>
|
||||
|| std::is_same_v<StorageType, Map>
|
||||
|| std::is_same_v<StorageType, Object>)
|
||||
reinterpret_cast<StorageType *>(&storage)->nested_field_depth = depth + 1;
|
||||
|
||||
which = TypeToEnum<UnqualifiedType>::value;
|
||||
}
|
||||
|
||||
@ -845,7 +781,7 @@ private:
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ALWAYS_INLINE void destroy()
|
||||
void destroy()
|
||||
{
|
||||
T * MAY_ALIAS ptr = reinterpret_cast<T*>(&storage);
|
||||
ptr->~T();
|
||||
|
@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const
|
||||
}
|
||||
else
|
||||
{
|
||||
ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth);
|
||||
ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth);
|
||||
}
|
||||
|
||||
return getImpl<nullptr_on_error>(ast);
|
||||
|
@ -1521,10 +1521,8 @@ struct Transformer
|
||||
if constexpr (std::is_same_v<Additions, DateTimeAccurateConvertStrategyAdditions>
|
||||
|| std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
|
||||
{
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
|
||||
bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL;
|
||||
# pragma clang diagnostic pop
|
||||
|
||||
if (!is_valid_input)
|
||||
{
|
||||
if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
|
||||
|
@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl
|
||||
}
|
||||
else
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
/// In case partial buffer was passed (due to SSE optimization)
|
||||
/// we cannot convert it with current src_end, but we may have more
|
||||
|
66
src/Functions/initcap.cpp
Normal file
66
src/Functions/initcap.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct InitcapImpl
|
||||
{
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (data.empty())
|
||||
return;
|
||||
res_data.resize(data.size());
|
||||
res_offsets.assign(offsets);
|
||||
array(data.data(), data.data() + data.size(), res_data.data());
|
||||
}
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
|
||||
{
|
||||
res_data.resize(data.size());
|
||||
array(data.data(), data.data() + data.size(), res_data.data());
|
||||
}
|
||||
|
||||
private:
|
||||
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
|
||||
{
|
||||
bool prev_alphanum = false;
|
||||
|
||||
for (; src < src_end; ++src, ++dst)
|
||||
{
|
||||
char c = *src;
|
||||
bool alphanum = isAlphaNumericASCII(c);
|
||||
if (alphanum && !prev_alphanum)
|
||||
if (isAlphaASCII(c))
|
||||
*dst = toUpperIfAlphaASCII(c);
|
||||
else
|
||||
*dst = c;
|
||||
else if (isAlphaASCII(c))
|
||||
*dst = toLowerIfAlphaASCII(c);
|
||||
else
|
||||
*dst = c;
|
||||
prev_alphanum = alphanum;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct NameInitcap
|
||||
{
|
||||
static constexpr auto name = "initcap";
|
||||
};
|
||||
using FunctionInitcap = FunctionStringToString<InitcapImpl, NameInitcap>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(Initcap)
|
||||
{
|
||||
factory.registerFunction<FunctionInitcap>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
114
src/Functions/initcapUTF8.cpp
Normal file
114
src/Functions/initcapUTF8.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/LowerUpperUTF8Impl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Poco/Unicode.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct InitcapUTF8Impl
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (data.empty())
|
||||
return;
|
||||
res_data.resize(data.size());
|
||||
res_offsets.assign(offsets);
|
||||
array(data.data(), data.data() + data.size(), offsets, res_data.data());
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
|
||||
}
|
||||
|
||||
static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum)
|
||||
{
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
|
||||
|
||||
if (src_code_point)
|
||||
{
|
||||
bool alpha = Poco::Unicode::isAlpha(*src_code_point);
|
||||
bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point);
|
||||
|
||||
int dst_code_point = *src_code_point;
|
||||
if (alphanum && !prev_alphanum)
|
||||
{
|
||||
if (alpha)
|
||||
dst_code_point = Poco::Unicode::toUpper(*src_code_point);
|
||||
}
|
||||
else if (alpha)
|
||||
{
|
||||
dst_code_point = Poco::Unicode::toLower(*src_code_point);
|
||||
}
|
||||
prev_alphanum = alphanum;
|
||||
if (dst_code_point > 0)
|
||||
{
|
||||
size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
|
||||
assert(dst_sequence_length <= 4);
|
||||
|
||||
if (dst_sequence_length == src_sequence_length)
|
||||
{
|
||||
src += dst_sequence_length;
|
||||
dst += dst_sequence_length;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*dst = *src;
|
||||
++dst;
|
||||
++src;
|
||||
prev_alphanum = false;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
|
||||
{
|
||||
const auto * offset_it = offsets.begin();
|
||||
const UInt8 * begin = src;
|
||||
|
||||
/// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
|
||||
while (src < src_end)
|
||||
{
|
||||
const UInt8 * row_end = begin + *offset_it;
|
||||
chassert(row_end >= src);
|
||||
bool prev_alphanum = false;
|
||||
while (src < row_end)
|
||||
processCodePoint(src, row_end, dst, prev_alphanum);
|
||||
++offset_it;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct NameInitcapUTF8
|
||||
{
|
||||
static constexpr auto name = "initcapUTF8";
|
||||
};
|
||||
|
||||
using FunctionInitcapUTF8 = FunctionStringToString<InitcapUTF8Impl, NameInitcapUTF8>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(InitcapUTF8)
|
||||
{
|
||||
factory.registerFunction<FunctionInitcapUTF8>();
|
||||
}
|
||||
|
||||
}
|
@ -1524,7 +1524,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
|
||||
uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
|
||||
if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
|
||||
{
|
||||
const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
|
||||
const auto & insert_structure = DatabaseCatalog::instance()
|
||||
.getTable(getInsertionTable(), shared_from_this())
|
||||
->getInMemoryMetadataPtr()
|
||||
->getColumns()
|
||||
.getInsertable();
|
||||
DB::ColumnsDescription structure_hint;
|
||||
|
||||
bool use_columns_from_insert_query = true;
|
||||
|
@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
|
||||
set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")
|
||||
|
||||
target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
|
||||
target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
|
||||
target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
|
||||
|
@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine):
|
||||
node1.query(f"SYSTEM STOP MOVES {name}")
|
||||
node1.query(f"SYSTEM STOP MERGES {name}")
|
||||
|
||||
first_part = None
|
||||
for i in range(5):
|
||||
data = [] # 5MB in total
|
||||
for i in range(5):
|
||||
for _ in range(5):
|
||||
data.append(get_random_string(1024 * 1024)) # 1MB row
|
||||
# jbod size is 40MB, so lets insert 5MB batch 7 times
|
||||
node1.query_with_retry(
|
||||
@ -829,7 +830,13 @@ def test_start_stop_moves(start_cluster, name, engine):
|
||||
)
|
||||
)
|
||||
|
||||
first_part = get_oldest_part(node1, name)
|
||||
# we cannot rely simply on modification time of part because it can be changed
|
||||
# by different background operations so we explicitly check after the first
|
||||
# part is inserted
|
||||
if i == 0:
|
||||
first_part = get_oldest_part(node1, name)
|
||||
|
||||
assert first_part is not None
|
||||
|
||||
used_disks = get_used_disks_for_table(node1, name)
|
||||
|
||||
|
@ -18,7 +18,7 @@ select distinct a from distinct_in_order settings max_block_size=10, max_threads
|
||||
|
||||
select '-- create table with not only primary key columns';
|
||||
drop table if exists distinct_in_order sync;
|
||||
create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b);
|
||||
create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000);
|
||||
|
||||
select '-- distinct with primary key prefix only';
|
||||
@ -59,16 +59,16 @@ drop table if exists distinct_in_order sync;
|
||||
|
||||
select '-- check that distinct in order returns the same result as ordinary distinct';
|
||||
drop table if exists distinct_cardinality_low sync;
|
||||
CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium);
|
||||
CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number % 1e3 FROM numbers_mt(1e4);
|
||||
|
||||
drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
@ -76,9 +76,9 @@ drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
@ -86,9 +86,9 @@ drop table if exists distinct_in_order;
|
||||
drop table if exists ordinary_distinct;
|
||||
|
||||
select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
@ -102,12 +102,12 @@ drop table if exists sorting_key_contain_function;
|
||||
|
||||
select '-- bug 42185, distinct in order and empty sort description';
|
||||
select '-- distinct in order, sorting key tuple()';
|
||||
create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple();
|
||||
create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10);
|
||||
select distinct a from sorting_key_empty_tuple;
|
||||
|
||||
select '-- distinct in order, sorting key contains function';
|
||||
create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime));
|
||||
create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
|
||||
insert into sorting_key_contain_function values ('2000-01-01', 1);
|
||||
insert into sorting_key_contain_function values ('2000-01-01', 2);
|
||||
select distinct datetime from sorting_key_contain_function;
|
||||
|
@ -364,6 +364,8 @@ in
|
||||
inIgnoreSet
|
||||
indexHint
|
||||
indexOf
|
||||
initcap
|
||||
initcapUTF8
|
||||
initialQueryID
|
||||
initializeAggregation
|
||||
intDiv
|
||||
|
13
tests/queries/0_stateless/02810_initcap.reference
Normal file
13
tests/queries/0_stateless/02810_initcap.reference
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
Hello
|
||||
Hello
|
||||
Hello World
|
||||
Yeah, Well, I`M Gonna Go Build My Own Theme Park
|
||||
Crc32ieee Is The Best Function
|
||||
42ok
|
||||
|
||||
Hello
|
||||
Yeah, Well, I`M Gonna Go Build My Own Theme Park
|
||||
Привет, Как Дела?
|
||||
Ätsch, Bätsch
|
||||
We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ.
|
14
tests/queries/0_stateless/02810_initcap.sql
Normal file
14
tests/queries/0_stateless/02810_initcap.sql
Normal file
@ -0,0 +1,14 @@
|
||||
select initcap('');
|
||||
select initcap('Hello');
|
||||
select initcap('hello');
|
||||
select initcap('hello world');
|
||||
select initcap('yeah, well, i`m gonna go build my own theme park');
|
||||
select initcap('CRC32IEEE is the best function');
|
||||
select initcap('42oK');
|
||||
|
||||
select initcapUTF8('');
|
||||
select initcapUTF8('Hello');
|
||||
select initcapUTF8('yeah, well, i`m gonna go build my own theme park');
|
||||
select initcapUTF8('привет, как дела?');
|
||||
select initcapUTF8('ätsch, bätsch');
|
||||
select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.');
|
@ -0,0 +1,9 @@
|
||||
drop table if exists test;
|
||||
create table test
|
||||
(
|
||||
n1 UInt32,
|
||||
n2 UInt32 alias murmurHash3_32(n1),
|
||||
n3 UInt32 materialized n2 + 1
|
||||
)engine=MergeTree order by n1;
|
||||
insert into test select * from generateRandom() limit 10;
|
||||
drop table test;
|
@ -1582,6 +1582,8 @@ indexOf
|
||||
infi
|
||||
initialQueryID
|
||||
initializeAggregation
|
||||
initcap
|
||||
initcapUTF
|
||||
injective
|
||||
innogames
|
||||
inodes
|
||||
|
Loading…
Reference in New Issue
Block a user