Merge branch 'master' into no-keep-context-lock-while-calculating-access

2024-09-20 08:40:50 +00:00 · 2023-07-18 07:35:39 +02:00 · 2023-07-18 07:35:39 +02:00 · 4963cfba39
commit 4963cfba39
parent fff1ae7369 5637c419bd
33 changed files with 386 additions and 159 deletions
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@ -1,43 +1,38 @@
-# Usage:
-# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes
-# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
-# include (cmake/limit_jobs.cmake)
+# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive.
+#
+# Usage from CMake:
+#    set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte
+#    set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte
+#    include (cmake/limit_jobs.cmake)
+#
+# (bigger values mean fewer jobs)

-cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd
+cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY)
 cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)

-# 1 if not set
-option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "")
+# Set to disable the automatic job-limiting
+option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF)
+option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF)

-# 1 if not set
-option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "")
-
-if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
+if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
    math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY})

    if (NOT PARALLEL_COMPILE_JOBS)
        set (PARALLEL_COMPILE_JOBS 1)
    endif ()
-    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        set (PARALLEL_COMPILE_JOBS_LESS TRUE)
+    if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
    endif()
 endif ()

-if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
-    set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
-    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
-    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
-endif ()
-
-
-if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
+if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
    math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY})

    if (NOT PARALLEL_LINK_JOBS)
        set (PARALLEL_LINK_JOBS 1)
    endif ()
-    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        set (PARALLEL_LINK_JOBS_LESS TRUE)
+    if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
    endif()
 endif ()

@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
    set (PARALLEL_LINK_JOBS 2)
 endif()

-if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES))
+message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
+
+if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+    set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
+    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
+    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
+endif ()
+
+if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
    set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS})
 endif ()
-
-if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
-    message(STATUS
-        "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory.
-        Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
-    if (PARALLEL_COMPILE_JOBS_LESS)
-        message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
-    endif()
-    if (PARALLEL_LINK_JOBS_LESS)
-        message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
-    endif()
-endif ()
--- a/contrib/cctz
+++ b/contrib/cctz
@ -1 +1 @@
-Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
+Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c
--- a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
@ -0,0 +1,32 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg
+sidebar_position: 110
+---
+
+# array_concat_agg 
+- Alias of `groupArrayArray`. The function is case insensitive.
+
+**Example**
+
+```text
+SELECT *
+FROM t
+
+┌─a───────┐
+│ [1,2,3] │
+│ [4,5]   │
+│ [6]     │
+└─────────┘
+
+```
+
+Query:
+
+```sql
+SELECT array_concat_agg(a) AS a
+FROM t
+
+┌─a─────────────┐
+│ [1,2,3,4,5,6] │
+└───────────────┘
+```
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -1255,3 +1255,15 @@ Result:
 │ A240             │
 └──────────────────┘
 ```
+
+## initcap
+
+Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+## initcapUTF8
+
+Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+
+If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@ -1113,3 +1113,14 @@ A text with tags .
 The content within <b>CDATA</b>
 Do Nothing for 2 Minutes 2:00 &nbsp;
 ```
+
+## initcap {#initcap}
+
+Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
+
+## initcapUTF8 {#initcapUTF8}
+
+Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
+Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
+Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
+Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -222,7 +222,6 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
        : nullptr;
 }

-
 std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
 {
    if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@ -126,6 +126,7 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)

    factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
    factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive);
    factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
    factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray<true>, properties });
 }
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@ -8,7 +8,6 @@

 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
-#include <Common/Arena.h>

 #include <Interpreters/Context.h>

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
        const auto & insertion_table = scope_context->getInsertionTable();
        if (!insertion_table.empty())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(insertion_table, scope_context)
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@ -52,35 +52,38 @@ public:
    {
        const auto & creator_map = getMap();
        const auto & case_insensitive_creator_map = getCaseInsensitiveMap();
-        const String factory_name = getFactoryName();

-        String real_dict_name;
-        if (creator_map.count(real_name))
-            real_dict_name = real_name;
-        else if (auto real_name_lowercase = Poco::toLower(real_name); case_insensitive_creator_map.count(real_name_lowercase))
-            real_dict_name = real_name_lowercase;
-        else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: can't create alias '{}', the real name '{}' is not registered",
-                            factory_name, alias_name, real_name);
+        auto real_name_lowercase = Poco::toLower(real_name);
+        if (!creator_map.contains(real_name) && !case_insensitive_creator_map.contains(real_name_lowercase))
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "{}: can't create alias '{}', the real name '{}' is not registered",
+                getFactoryName(),
+                alias_name,
+                real_name);

+        registerAliasUnchecked(alias_name, real_name, case_sensitiveness);
+    }
+
+    /// We need sure the real_name exactly exists when call the function directly.
+    void registerAliasUnchecked(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive)
+    {
        String alias_name_lowercase = Poco::toLower(alias_name);
-
-        if (creator_map.count(alias_name) || case_insensitive_creator_map.count(alias_name_lowercase))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: the alias name '{}' is already registered as real name",
-                            factory_name, alias_name);
+        String real_name_lowercase = Poco::toLower(real_name);
+        const String factory_name = getFactoryName();

        if (case_sensitiveness == CaseInsensitive)
        {
-            if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_dict_name).second)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique",
-                                factory_name, alias_name);
+            if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_name).second)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique", factory_name, alias_name);
            case_insensitive_name_mapping[alias_name_lowercase] = real_name;
        }

-        if (!aliases.emplace(alias_name, real_dict_name).second)
+        if (!aliases.emplace(alias_name, real_name).second)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: alias name '{}' is not unique", factory_name, alias_name);
    }

+
    std::vector<String> getAllRegisteredNames() const override
    {
        std::vector<String> result;
@ -93,7 +96,7 @@ public:
    bool isCaseInsensitive(const String & name) const
    {
        String name_lowercase = Poco::toLower(name);
-        return getCaseInsensitiveMap().count(name_lowercase) || case_insensitive_aliases.count(name_lowercase);
+        return getCaseInsensitiveMap().contains(name_lowercase) || case_insensitive_aliases.contains(name_lowercase);
    }

    const String & aliasTo(const String & name) const
@ -106,14 +109,11 @@ public:
        throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: name '{}' is not alias", getFactoryName(), name);
    }

-    bool isAlias(const String & name) const
-    {
-        return aliases.count(name) || case_insensitive_aliases.contains(name);
-    }
+    bool isAlias(const String & name) const { return aliases.contains(name) || case_insensitive_aliases.contains(name); }

    bool hasNameOrAlias(const String & name) const
    {
-        return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
+        return getMap().contains(name) || getCaseInsensitiveMap().contains(name) || isAlias(name);
    }

    /// Return the canonical name (the name used in registration) if it's different from `name`.
@ -129,7 +129,7 @@ public:

 private:
    using InnerMap = std::unordered_map<String, Value>; // name -> creator
-    using AliasMap = std::unordered_map<String, String>; // alias -> original type
+    using AliasMap = std::unordered_map<String, String>; // alias -> original name

    virtual const InnerMap & getMap() const = 0;
    virtual const InnerMap & getCaseInsensitiveMap() const = 0;
--- a/src/Common/mysqlxx/Pool.cpp
+++ b/src/Common/mysqlxx/Pool.cpp
@ -25,8 +25,6 @@ void Pool::Entry::incrementRefCount()
    /// First reference, initialize thread
    if (data->ref_count.fetch_add(1) == 0)
        mysql_thread_init();
-
-    chassert(!data->removed_from_pool);
 }


@ -43,7 +41,10 @@ void Pool::Entry::decrementRefCount()
        /// In Pool::Entry::disconnect() we remove connection from the list of pool's connections.
        /// So now we must deallocate the memory.
        if (data->removed_from_pool)
+        {
+            data->conn.disconnect();
            ::delete data;
+        }
    }
 }

@ -230,8 +231,6 @@ void Pool::removeConnection(Connection* connection)
    std::lock_guard lock(mutex);
    if (connection)
    {
-        if (!connection->removed_from_pool)
-            connection->conn.disconnect();
        connections.remove(connection);
        connection->removed_from_pool = true;
    }
@ -240,6 +239,7 @@ void Pool::removeConnection(Connection* connection)

 void Pool::Entry::disconnect()
 {
+    // Remove the Entry from the Pool. Actual disconnection is delayed until refcount == 0.
    pool->removeConnection(data);
 }

--- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
@ -292,10 +292,10 @@ try

    DB::Memory<> memory;
    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);

    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
    return 0;
 }
 catch (...)
--- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
@ -24,7 +24,7 @@ try
        return 0;

    const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
-    auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
+    auto codec = DB::getCompressionCodecLZ4(p->level);

    size_t output_buffer_size = p->decompressed_size % 65536;
    size -= sizeof(AuxiliaryRandomData);
@ -37,7 +37,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -28,7 +28,6 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
    extern const int LOGICAL_ERROR;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int TOO_DEEP_RECURSION;
 }

 constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity};
@ -42,13 +41,10 @@ using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
 /// construct a Field of Array or a Tuple type. An alternative approach would be
 /// to construct both of these types from FieldVector, and have the caller
 /// specify the desired Field type explicitly.
-/// As the result stack overflow on destruction is possible
-/// and to avoid it we need to count the depth and have a threshold.
 #define DEFINE_FIELD_VECTOR(X) \
 struct X : public FieldVector \
 { \
    using FieldVector::FieldVector; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_VECTOR(Array);
@ -65,7 +61,6 @@ using FieldMap = std::map<String, Field, std::less<>, AllocatorWithMemoryTrackin
 struct X : public FieldMap \
 { \
    using FieldMap::FieldMap; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_MAP(Object);
@ -296,12 +291,6 @@ decltype(auto) castToNearestFieldType(T && x)
  */
 #define DBMS_MIN_FIELD_SIZE 32

-/// Note: uint8_t is used for storing depth value.
-#if defined(SANITIZER) || !defined(NDEBUG)
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 64
-#else
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 255
-#endif

 /** Discriminated union of several types.
  * Made for replacement of `boost::variant`
@ -682,49 +671,6 @@ private:

    Types::Which which;

-    /// StorageType and Original are the same for Array, Tuple, Map, Object
-    template <typename StorageType, typename Original>
-    uint8_t calculateAndCheckFieldDepth(Original && x)
-    {
-        uint8_t result = 0;
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-        {
-            result = x.nested_field_depth;
-
-            auto get_depth = [](const Field & elem)
-            {
-                switch (elem.which)
-                {
-                    case Types::Array:
-                        return elem.template get<Array>().nested_field_depth;
-                    case Types::Tuple:
-                        return elem.template get<Tuple>().nested_field_depth;
-                    case Types::Map:
-                        return elem.template get<Map>().nested_field_depth;
-                    case Types::Object:
-                        return elem.template get<Object>().nested_field_depth;
-                    default:
-                        return static_cast<uint8_t>(0);
-                }
-            };
-
-            if constexpr (std::is_same_v<StorageType, Object>)
-                for (auto & [_, value] : x)
-                    result = std::max(get_depth(value), result);
-            else
-                for (auto & value : x)
-                    result = std::max(get_depth(value), result);
-        }
-
-        if (result >= DBMS_MAX_NESTED_FIELD_DEPTH)
-            throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field");
-
-        return result;
-    }

    /// Assuming there was no allocated state or it was deallocated (see destroy).
    template <typename T>
@ -738,17 +684,7 @@ private:
        // we must initialize the entire wide stored type, and not just the
        // nominal type.
        using StorageType = NearestFieldType<UnqualifiedType>;
-
-        /// Incrementing the depth since we create a new Field.
-        auto depth = calculateAndCheckFieldDepth<StorageType>(x);
        new (&storage) StorageType(std::forward<T>(x));
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-            reinterpret_cast<StorageType *>(&storage)->nested_field_depth = depth + 1;
-
        which = TypeToEnum<UnqualifiedType>::value;
    }

@ -845,7 +781,7 @@ private:
    }

    template <typename T>
-    ALWAYS_INLINE void destroy()
+    void destroy()
    {
        T * MAY_ALIAS ptr = reinterpret_cast<T*>(&storage);
        ptr->~T();
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const
    }
    else
    {
-        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth);
+        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth);
    }

    return getImpl<nullptr_on_error>(ast);
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@ -1521,10 +1521,8 @@ struct Transformer
                if constexpr (std::is_same_v<Additions, DateTimeAccurateConvertStrategyAdditions>
                    || std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
                {
-#   pragma clang diagnostic push
-#   pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
                    bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL;
-#   pragma clang diagnostic pop
+
                    if (!is_valid_input)
                    {
                        if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl
        }
        else
        {
-            static const Poco::UTF8Encoding utf8;
-
            size_t src_sequence_length = UTF8::seqLength(*src);
            /// In case partial buffer was passed (due to SSE optimization)
            /// we cannot convert it with current src_end, but we may have more
--- a/src/Functions/initcap.cpp
+++ b/src/Functions/initcap.cpp
@ -0,0 +1,66 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+namespace
+{
+
+struct InitcapImpl
+{
+    static void vector(const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+    static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
+    {
+        res_data.resize(data.size());
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+private:
+    static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
+    {
+        bool prev_alphanum = false;
+
+        for (; src < src_end; ++src, ++dst)
+        {
+            char c = *src;
+            bool alphanum = isAlphaNumericASCII(c);
+            if (alphanum && !prev_alphanum)
+                if (isAlphaASCII(c))
+                    *dst = toUpperIfAlphaASCII(c);
+                else
+                    *dst = c;
+            else if (isAlphaASCII(c))
+                *dst = toLowerIfAlphaASCII(c);
+            else
+                *dst = c;
+            prev_alphanum = alphanum;
+        }
+    }
+};
+
+struct NameInitcap
+{
+    static constexpr auto name = "initcap";
+};
+using FunctionInitcap = FunctionStringToString<InitcapImpl, NameInitcap>;
+
+}
+
+REGISTER_FUNCTION(Initcap)
+{
+    factory.registerFunction<FunctionInitcap>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@ -0,0 +1,114 @@
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionStringToString.h>
+#include <Functions/LowerUpperUTF8Impl.h>
+#include <Functions/FunctionFactory.h>
+#include <Poco/Unicode.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+struct InitcapUTF8Impl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), offsets, res_data.data());
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
+    }
+
+    static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum)
+    {
+        size_t src_sequence_length = UTF8::seqLength(*src);
+        auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+
+        if (src_code_point)
+        {
+            bool alpha = Poco::Unicode::isAlpha(*src_code_point);
+            bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point);
+
+            int dst_code_point = *src_code_point;
+            if (alphanum && !prev_alphanum)
+            {
+                if (alpha)
+                    dst_code_point = Poco::Unicode::toUpper(*src_code_point);
+            }
+            else if (alpha)
+            {
+                dst_code_point = Poco::Unicode::toLower(*src_code_point);
+            }
+            prev_alphanum = alphanum;
+            if (dst_code_point > 0)
+            {
+                size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                assert(dst_sequence_length <= 4);
+
+                if (dst_sequence_length == src_sequence_length)
+                {
+                    src += dst_sequence_length;
+                    dst += dst_sequence_length;
+                    return;
+                }
+            }
+        }
+
+        *dst = *src;
+        ++dst;
+        ++src;
+        prev_alphanum = false;
+    }
+
+private:
+
+    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
+    {
+        const auto * offset_it = offsets.begin();
+        const UInt8 * begin = src;
+
+        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
+        while (src < src_end)
+        {
+            const UInt8 * row_end = begin + *offset_it;
+            chassert(row_end >= src);
+            bool prev_alphanum = false;
+            while (src < row_end)
+                processCodePoint(src, row_end, dst, prev_alphanum);
+            ++offset_it;
+        }
+    }
+};
+
+struct NameInitcapUTF8
+{
+    static constexpr auto name = "initcapUTF8";
+};
+
+using FunctionInitcapUTF8 = FunctionStringToString<InitcapUTF8Impl, NameInitcapUTF8>;
+
+}
+
+REGISTER_FUNCTION(InitcapUTF8)
+{
+    factory.registerFunction<FunctionInitcapUTF8>();
+}
+
+}
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1574,7 +1574,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
        if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(getInsertionTable(), shared_from_this())
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
+++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
 set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")

 target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
-target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
+target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine):
        node1.query(f"SYSTEM STOP MOVES {name}")
        node1.query(f"SYSTEM STOP MERGES {name}")

+        first_part = None
        for i in range(5):
            data = []  # 5MB in total
-            for i in range(5):
+            for _ in range(5):
                data.append(get_random_string(1024 * 1024))  # 1MB row
            # jbod size is 40MB, so lets insert 5MB batch 7 times
            node1.query_with_retry(
@ -829,7 +830,13 @@ def test_start_stop_moves(start_cluster, name, engine):
                )
            )

-        first_part = get_oldest_part(node1, name)
+            # we cannot rely simply on modification time of part because it can be changed
+            # by different background operations so we explicitly check after the first
+            # part is inserted
+            if i == 0:
+                first_part = get_oldest_part(node1, name)
+
+        assert first_part is not None

        used_disks = get_used_disks_for_table(node1, name)

--- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
+++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
@ -18,7 +18,7 @@ select distinct a from distinct_in_order settings max_block_size=10, max_threads

 select '-- create table with not only primary key columns';
 drop table if exists distinct_in_order sync;
-create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b);
+create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000);

 select '-- distinct with primary key prefix only';
@ -59,16 +59,16 @@ drop table if exists distinct_in_order sync;

 select '-- check that distinct in order returns the same result as ordinary distinct';
 drop table if exists distinct_cardinality_low sync;
-CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium);
+CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number % 1e3 FROM numbers_mt(1e4);

 drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -76,9 +76,9 @@ drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -86,9 +86,9 @@ drop table if exists distinct_in_order;
 drop table if exists ordinary_distinct;

 select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -102,12 +102,12 @@ drop table if exists sorting_key_contain_function;

 select '-- bug 42185, distinct in order and empty sort description';
 select '-- distinct in order, sorting key tuple()';
-create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple();
+create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10);
 select distinct a from sorting_key_empty_tuple;

 select '-- distinct in order, sorting key contains function';
-create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime));
+create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_contain_function values ('2000-01-01', 1);
 insert into sorting_key_contain_function values ('2000-01-01', 2);
 select distinct datetime from sorting_key_contain_function;
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -364,6 +364,8 @@ in
 inIgnoreSet
 indexHint
 indexOf
+initcap
+initcapUTF8
 initialQueryID
 initializeAggregation
 intDiv
--- a/tests/queries/0_stateless/02810_initcap.reference
+++ b/tests/queries/0_stateless/02810_initcap.reference
@ -0,0 +1,13 @@
+
+Hello
+Hello
+Hello World
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Crc32ieee Is The Best Function
+42ok
+
+Hello
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Привет, Как Дела?
+Ätsch, Bätsch
+We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ.
--- a/tests/queries/0_stateless/02810_initcap.sql
+++ b/tests/queries/0_stateless/02810_initcap.sql
@ -0,0 +1,14 @@
+select initcap('');
+select initcap('Hello');
+select initcap('hello');
+select initcap('hello world');
+select initcap('yeah, well, i`m gonna go build my own theme park');
+select initcap('CRC32IEEE is the best function');
+select initcap('42oK');
+
+select initcapUTF8('');
+select initcapUTF8('Hello');
+select initcapUTF8('yeah, well, i`m gonna go build my own theme park');
+select initcapUTF8('привет, как дела?');
+select initcapUTF8('ätsch, bätsch');
+select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.');
--- a/tests/queries/0_stateless/02811_insert_schema_inference.reference
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.reference
--- a/tests/queries/0_stateless/02811_insert_schema_inference.sql
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql
@ -0,0 +1,9 @@
+drop table if exists test;
+create table test
+(
+   n1 UInt32,
+   n2 UInt32 alias murmurHash3_32(n1),
+   n3 UInt32 materialized n2 + 1
+)engine=MergeTree order by n1;
+insert into test select * from generateRandom() limit 10;
+drop table test;
--- a/tests/queries/0_stateless/02813_array_concat_agg.reference
+++ b/tests/queries/0_stateless/02813_array_concat_agg.reference
@ -0,0 +1,5 @@
+[1,2,3,4,5,6]
+[1,2,3,4,5,6]
+1	[1,2,3]
+2	[4,5]
+3	[6]
--- a/tests/queries/0_stateless/02813_array_concat_agg.sql
+++ b/tests/queries/0_stateless/02813_array_concat_agg.sql
@ -0,0 +1,9 @@
+drop table if exists t;
+
+create table t (n UInt32, a Array(Int32)) engine=Memory;
+insert into t values (1, [1,2,3]), (2, [4,5]), (3, [6]);
+
+select array_concat_agg(a) from t;
+select ArrAy_cOncAt_aGg(a) from t;
+select n, array_concat_agg(a) from t group by n order by n;
+drop table t;
--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -991,6 +991,7 @@ addressToLine
 addressToLineWithInlines
 addressToSymbol
 adviced
+agg
 aggregatefunction
 aggregatingmergetree
 aggregatio
@ -1582,6 +1583,8 @@ indexOf
 infi
 initialQueryID
 initializeAggregation
+initcap
+initcapUTF
 injective
 innogames
 inodes