Merge branch 'master' into zvonand-issue-49290

2024-09-20 08:40:50 +00:00 · 2023-07-17 23:03:50 +03:00 · 2023-07-17 23:03:50 +03:00 · 4884022fda
commit 4884022fda
parent d339c22a2f 9f45513acf
24 changed files with 282 additions and 93 deletions
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -1255,3 +1255,15 @@ Result:
 │ A240             │
 └──────────────────┘
 ```
+
+## initcap
+
+Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+## initcapUTF8
+
+Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+
+If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@ -1113,3 +1113,14 @@ A text with tags .
 The content within <b>CDATA</b>
 Do Nothing for 2 Minutes 2:00 &nbsp;
 ```
+
+## initcap {#initcap}
+
+Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
+
+## initcapUTF8 {#initcapUTF8}
+
+Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
+Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
+Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
+Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@ -8,7 +8,6 @@

 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
-#include <Common/Arena.h>

 #include <Interpreters/Context.h>

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
        const auto & insertion_table = scope_context->getInsertionTable();
        if (!insertion_table.empty())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(insertion_table, scope_context)
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
@ -292,10 +292,10 @@ try

    DB::Memory<> memory;
    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);

    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
    return 0;
 }
 catch (...)
--- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
@ -24,7 +24,7 @@ try
        return 0;

    const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
-    auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
+    auto codec = DB::getCompressionCodecLZ4(p->level);

    size_t output_buffer_size = p->decompressed_size % 65536;
    size -= sizeof(AuxiliaryRandomData);
@ -37,7 +37,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -28,7 +28,6 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
    extern const int LOGICAL_ERROR;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int TOO_DEEP_RECURSION;
 }

 constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity};
@ -42,13 +41,10 @@ using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
 /// construct a Field of Array or a Tuple type. An alternative approach would be
 /// to construct both of these types from FieldVector, and have the caller
 /// specify the desired Field type explicitly.
-/// As the result stack overflow on destruction is possible
-/// and to avoid it we need to count the depth and have a threshold.
 #define DEFINE_FIELD_VECTOR(X) \
 struct X : public FieldVector \
 { \
    using FieldVector::FieldVector; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_VECTOR(Array);
@ -65,7 +61,6 @@ using FieldMap = std::map<String, Field, std::less<>, AllocatorWithMemoryTrackin
 struct X : public FieldMap \
 { \
    using FieldMap::FieldMap; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_MAP(Object);
@ -296,12 +291,6 @@ decltype(auto) castToNearestFieldType(T && x)
  */
 #define DBMS_MIN_FIELD_SIZE 32

-/// Note: uint8_t is used for storing depth value.
-#if defined(SANITIZER) || !defined(NDEBUG)
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 64
-#else
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 255
-#endif

 /** Discriminated union of several types.
  * Made for replacement of `boost::variant`
@ -682,49 +671,6 @@ private:

    Types::Which which;

-    /// StorageType and Original are the same for Array, Tuple, Map, Object
-    template <typename StorageType, typename Original>
-    uint8_t calculateAndCheckFieldDepth(Original && x)
-    {
-        uint8_t result = 0;
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-        {
-            result = x.nested_field_depth;
-
-            auto get_depth = [](const Field & elem)
-            {
-                switch (elem.which)
-                {
-                    case Types::Array:
-                        return elem.template get<Array>().nested_field_depth;
-                    case Types::Tuple:
-                        return elem.template get<Tuple>().nested_field_depth;
-                    case Types::Map:
-                        return elem.template get<Map>().nested_field_depth;
-                    case Types::Object:
-                        return elem.template get<Object>().nested_field_depth;
-                    default:
-                        return static_cast<uint8_t>(0);
-                }
-            };
-
-            if constexpr (std::is_same_v<StorageType, Object>)
-                for (auto & [_, value] : x)
-                    result = std::max(get_depth(value), result);
-            else
-                for (auto & value : x)
-                    result = std::max(get_depth(value), result);
-        }
-
-        if (result >= DBMS_MAX_NESTED_FIELD_DEPTH)
-            throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field");
-
-        return result;
-    }

    /// Assuming there was no allocated state or it was deallocated (see destroy).
    template <typename T>
@ -738,17 +684,7 @@ private:
        // we must initialize the entire wide stored type, and not just the
        // nominal type.
        using StorageType = NearestFieldType<UnqualifiedType>;
-
-        /// Incrementing the depth since we create a new Field.
-        auto depth = calculateAndCheckFieldDepth<StorageType>(x);
        new (&storage) StorageType(std::forward<T>(x));
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-            reinterpret_cast<StorageType *>(&storage)->nested_field_depth = depth + 1;
-
        which = TypeToEnum<UnqualifiedType>::value;
    }

@ -845,7 +781,7 @@ private:
    }

    template <typename T>
-    ALWAYS_INLINE void destroy()
+    void destroy()
    {
        T * MAY_ALIAS ptr = reinterpret_cast<T*>(&storage);
        ptr->~T();
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const
    }
    else
    {
-        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth);
+        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth);
    }

    return getImpl<nullptr_on_error>(ast);
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@ -1521,10 +1521,8 @@ struct Transformer
                if constexpr (std::is_same_v<Additions, DateTimeAccurateConvertStrategyAdditions>
                    || std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
                {
-#   pragma clang diagnostic push
-#   pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
                    bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL;
-#   pragma clang diagnostic pop
+
                    if (!is_valid_input)
                    {
                        if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl
        }
        else
        {
-            static const Poco::UTF8Encoding utf8;
-
            size_t src_sequence_length = UTF8::seqLength(*src);
            /// In case partial buffer was passed (due to SSE optimization)
            /// we cannot convert it with current src_end, but we may have more
--- a/src/Functions/initcap.cpp
+++ b/src/Functions/initcap.cpp
@ -0,0 +1,66 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+namespace
+{
+
+struct InitcapImpl
+{
+    static void vector(const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+    static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
+    {
+        res_data.resize(data.size());
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+private:
+    static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
+    {
+        bool prev_alphanum = false;
+
+        for (; src < src_end; ++src, ++dst)
+        {
+            char c = *src;
+            bool alphanum = isAlphaNumericASCII(c);
+            if (alphanum && !prev_alphanum)
+                if (isAlphaASCII(c))
+                    *dst = toUpperIfAlphaASCII(c);
+                else
+                    *dst = c;
+            else if (isAlphaASCII(c))
+                *dst = toLowerIfAlphaASCII(c);
+            else
+                *dst = c;
+            prev_alphanum = alphanum;
+        }
+    }
+};
+
+struct NameInitcap
+{
+    static constexpr auto name = "initcap";
+};
+using FunctionInitcap = FunctionStringToString<InitcapImpl, NameInitcap>;
+
+}
+
+REGISTER_FUNCTION(Initcap)
+{
+    factory.registerFunction<FunctionInitcap>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@ -0,0 +1,114 @@
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionStringToString.h>
+#include <Functions/LowerUpperUTF8Impl.h>
+#include <Functions/FunctionFactory.h>
+#include <Poco/Unicode.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+struct InitcapUTF8Impl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), offsets, res_data.data());
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
+    }
+
+    static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum)
+    {
+        size_t src_sequence_length = UTF8::seqLength(*src);
+        auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+
+        if (src_code_point)
+        {
+            bool alpha = Poco::Unicode::isAlpha(*src_code_point);
+            bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point);
+
+            int dst_code_point = *src_code_point;
+            if (alphanum && !prev_alphanum)
+            {
+                if (alpha)
+                    dst_code_point = Poco::Unicode::toUpper(*src_code_point);
+            }
+            else if (alpha)
+            {
+                dst_code_point = Poco::Unicode::toLower(*src_code_point);
+            }
+            prev_alphanum = alphanum;
+            if (dst_code_point > 0)
+            {
+                size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                assert(dst_sequence_length <= 4);
+
+                if (dst_sequence_length == src_sequence_length)
+                {
+                    src += dst_sequence_length;
+                    dst += dst_sequence_length;
+                    return;
+                }
+            }
+        }
+
+        *dst = *src;
+        ++dst;
+        ++src;
+        prev_alphanum = false;
+    }
+
+private:
+
+    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
+    {
+        const auto * offset_it = offsets.begin();
+        const UInt8 * begin = src;
+
+        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
+        while (src < src_end)
+        {
+            const UInt8 * row_end = begin + *offset_it;
+            chassert(row_end >= src);
+            bool prev_alphanum = false;
+            while (src < row_end)
+                processCodePoint(src, row_end, dst, prev_alphanum);
+            ++offset_it;
+        }
+    }
+};
+
+struct NameInitcapUTF8
+{
+    static constexpr auto name = "initcapUTF8";
+};
+
+using FunctionInitcapUTF8 = FunctionStringToString<InitcapUTF8Impl, NameInitcapUTF8>;
+
+}
+
+REGISTER_FUNCTION(InitcapUTF8)
+{
+    factory.registerFunction<FunctionInitcapUTF8>();
+}
+
+}
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1524,7 +1524,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
        if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(getInsertionTable(), shared_from_this())
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
+++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
 set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")

 target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
-target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
+target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine):
        node1.query(f"SYSTEM STOP MOVES {name}")
        node1.query(f"SYSTEM STOP MERGES {name}")

+        first_part = None
        for i in range(5):
            data = []  # 5MB in total
-            for i in range(5):
+            for _ in range(5):
                data.append(get_random_string(1024 * 1024))  # 1MB row
            # jbod size is 40MB, so lets insert 5MB batch 7 times
            node1.query_with_retry(
@ -829,7 +830,13 @@ def test_start_stop_moves(start_cluster, name, engine):
                )
            )

-        first_part = get_oldest_part(node1, name)
+            # we cannot rely simply on modification time of part because it can be changed
+            # by different background operations so we explicitly check after the first
+            # part is inserted
+            if i == 0:
+                first_part = get_oldest_part(node1, name)
+
+        assert first_part is not None

        used_disks = get_used_disks_for_table(node1, name)

--- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
+++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
@ -18,7 +18,7 @@ select distinct a from distinct_in_order settings max_block_size=10, max_threads

 select '-- create table with not only primary key columns';
 drop table if exists distinct_in_order sync;
-create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b);
+create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000);

 select '-- distinct with primary key prefix only';
@ -59,16 +59,16 @@ drop table if exists distinct_in_order sync;

 select '-- check that distinct in order returns the same result as ordinary distinct';
 drop table if exists distinct_cardinality_low sync;
-CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium);
+CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number % 1e3 FROM numbers_mt(1e4);

 drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -76,9 +76,9 @@ drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -86,9 +86,9 @@ drop table if exists distinct_in_order;
 drop table if exists ordinary_distinct;

 select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -102,12 +102,12 @@ drop table if exists sorting_key_contain_function;

 select '-- bug 42185, distinct in order and empty sort description';
 select '-- distinct in order, sorting key tuple()';
-create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple();
+create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10);
 select distinct a from sorting_key_empty_tuple;

 select '-- distinct in order, sorting key contains function';
-create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime));
+create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_contain_function values ('2000-01-01', 1);
 insert into sorting_key_contain_function values ('2000-01-01', 2);
 select distinct datetime from sorting_key_contain_function;
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -364,6 +364,8 @@ in
 inIgnoreSet
 indexHint
 indexOf
+initcap
+initcapUTF8
 initialQueryID
 initializeAggregation
 intDiv
--- a/tests/queries/0_stateless/02810_initcap.reference
+++ b/tests/queries/0_stateless/02810_initcap.reference
@ -0,0 +1,13 @@
+
+Hello
+Hello
+Hello World
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Crc32ieee Is The Best Function
+42ok
+
+Hello
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Привет, Как Дела?
+Ätsch, Bätsch
+We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ.
--- a/tests/queries/0_stateless/02810_initcap.sql
+++ b/tests/queries/0_stateless/02810_initcap.sql
@ -0,0 +1,14 @@
+select initcap('');
+select initcap('Hello');
+select initcap('hello');
+select initcap('hello world');
+select initcap('yeah, well, i`m gonna go build my own theme park');
+select initcap('CRC32IEEE is the best function');
+select initcap('42oK');
+
+select initcapUTF8('');
+select initcapUTF8('Hello');
+select initcapUTF8('yeah, well, i`m gonna go build my own theme park');
+select initcapUTF8('привет, как дела?');
+select initcapUTF8('ätsch, bätsch');
+select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.');
--- a/tests/queries/0_stateless/02811_insert_schema_inference.reference
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.reference
--- a/tests/queries/0_stateless/02811_insert_schema_inference.sql
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql
@ -0,0 +1,9 @@
+drop table if exists test;
+create table test
+(
+   n1 UInt32,
+   n2 UInt32 alias murmurHash3_32(n1),
+   n3 UInt32 materialized n2 + 1
+)engine=MergeTree order by n1;
+insert into test select * from generateRandom() limit 10;
+drop table test;
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -1582,6 +1582,8 @@ indexOf
 infi
 initialQueryID
 initializeAggregation
+initcap
+initcapUTF
 injective
 innogames
 inodes