Merge branch 'master' into analyzer-integration-tests-master

2024-09-20 08:40:50 +00:00 · 2023-07-18 16:33:59 +03:00 · 2023-07-18 16:33:59 +03:00 · 7f2f73f569
commit 7f2f73f569
parent fc05187e8a 66531965f9
75 changed files with 1850 additions and 264 deletions
--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@ -1,43 +1,38 @@
-# Usage:
-# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes
-# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
+# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive.
+#
+# Usage from CMake:
+#    set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte
+#    set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte
 #    include (cmake/limit_jobs.cmake)
+#
+# (bigger values mean fewer jobs)

-cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd
+cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY)
 cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)

-# 1 if not set
-option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "")
+# Set to disable the automatic job-limiting
+option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF)
+option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF)

-# 1 if not set
-option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "")
-
-if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
+if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
    math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY})

    if (NOT PARALLEL_COMPILE_JOBS)
        set (PARALLEL_COMPILE_JOBS 1)
    endif ()
-    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        set (PARALLEL_COMPILE_JOBS_LESS TRUE)
+    if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
    endif()
 endif ()

-if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
-    set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
-    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
-    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
-endif ()
-
-
-if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
+if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
    math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY})

    if (NOT PARALLEL_LINK_JOBS)
        set (PARALLEL_LINK_JOBS 1)
    endif ()
-    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
-        set (PARALLEL_LINK_JOBS_LESS TRUE)
+    if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
    endif()
 endif ()

@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
    set (PARALLEL_LINK_JOBS 2)
 endif()

-if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES))
+message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
+
+if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+    set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
+    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
+    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
+endif ()
+
+if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
    set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
    string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
    set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS})
 endif ()
-
-if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
-    message(STATUS
-        "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory.
-        Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
-    if (PARALLEL_COMPILE_JOBS_LESS)
-        message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
-    endif()
-    if (PARALLEL_LINK_JOBS_LESS)
-        message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
-    endif()
-endif ()
--- a/contrib/cctz
+++ b/contrib/cctz
@ -1 +1 @@
-Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
+Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4524,6 +4524,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta

 ### Placeholders

+- `%a` — Full original filename (e.g., "sample.csv").
 - `%f` — Original filename without extension (e.g., "sample").
 - `%e` — Original file extension with dot (e.g., ".csv").
 - `%t` — Timestamp (in microseconds).
--- a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md
@ -0,0 +1,32 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg
+sidebar_position: 110
+---
+
+# array_concat_agg 
+- Alias of `groupArrayArray`. The function is case insensitive.
+
+**Example**
+
+```text
+SELECT *
+FROM t
+
+┌─a───────┐
+│ [1,2,3] │
+│ [4,5]   │
+│ [6]     │
+└─────────┘
+
+```
+
+Query:
+
+```sql
+SELECT array_concat_agg(a) AS a
+FROM t
+
+┌─a─────────────┐
+│ [1,2,3,4,5,6] │
+└───────────────┘
+```
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -1255,3 +1255,15 @@ Result:
 │ A240             │
 └──────────────────┘
 ```
+
+## initcap
+
+Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
+
+## initcapUTF8
+
+Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
+
+If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -4201,6 +4201,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
 ### Шаблон
 Шаблон поддерживает следующие виды плейсхолдеров:

+- `%a` — Полное исходное имя файла (например "sample.csv").
 - `%f` — Исходное имя файла без расширения (например "sample").
 - `%e` — Оригинальное расширение файла с точкой (например ".csv").
 - `%t` — Текущее время (в микросекундах).
--- a/docs/ru/sql-reference/functions/string-functions.md
+++ b/docs/ru/sql-reference/functions/string-functions.md
@ -1113,3 +1113,14 @@ A text with tags .
 The content within <b>CDATA</b>
 Do Nothing for 2 Minutes 2:00 &nbsp;
 ```
+
+## initcap {#initcap}
+
+Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
+
+## initcapUTF8 {#initcapUTF8}
+
+Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
+Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
+Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
+Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
--- a/src/AggregateFunctions/AggregateFunctionFactory.cpp
+++ b/src/AggregateFunctions/AggregateFunctionFactory.cpp
@ -222,7 +222,6 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
        : nullptr;
 }

-
 std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
 {
    if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp
@ -126,6 +126,7 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)

    factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
    factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive);
    factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
    factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray<true>, properties });
 }
--- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
+++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp
@ -8,7 +8,6 @@

 #include <Common/MemoryTracker.h>
 #include <Common/CurrentThread.h>
-#include <Common/Arena.h>

 #include <Interpreters/Context.h>

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
        const auto & insertion_table = scope_context->getInsertionTable();
        if (!insertion_table.empty())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(insertion_table, scope_context)
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Common/DateLUTImpl.cpp
+++ b/src/Common/DateLUTImpl.cpp
@ -10,7 +10,6 @@
 #include <cassert>
 #include <chrono>
 #include <cstring>
-#include <iostream>
 #include <memory>


--- a/src/Common/FileRenamer.cpp
+++ b/src/Common/FileRenamer.cpp
@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const
    // Define placeholders and their corresponding values
    std::map<String, String> placeholders =
    {
+        {"%a", filename},
        {"%f", file_base},
        {"%e", file_ext},
        {"%t", timestamp},
@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const
 bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error)
 {
    // Check if the rule contains invalid placeholders
-    re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$");
+    re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$");
    if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern))
    {
        if (throw_on_error)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%");
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%");
        return false;
    }

    // Replace valid placeholders with empty strings and count remaining percentage signs.
    String replaced_rule = rule;
+    boost::replace_all(replaced_rule, "%a", "");
    boost::replace_all(replaced_rule, "%f", "");
    boost::replace_all(replaced_rule, "%e", "");
    boost::replace_all(replaced_rule, "%t", "");
--- a/src/Common/FileRenamer.h
+++ b/src/Common/FileRenamer.h
@ -9,6 +9,7 @@ namespace DB
 /**
  * The FileRenamer class provides functionality for renaming files based on given pattern with placeholders
  * The supported placeholders are:
+  *   %a - Full original file name ("sample.csv")
  *   %f - Original filename without extension ("sample")
  *   %e - Original file extension with dot (".csv")
  *   %t - Timestamp (in microseconds)
--- a/src/Common/IFactoryWithAliases.h
+++ b/src/Common/IFactoryWithAliases.h
@ -52,35 +52,38 @@ public:
    {
        const auto & creator_map = getMap();
        const auto & case_insensitive_creator_map = getCaseInsensitiveMap();
-        const String factory_name = getFactoryName();

-        String real_dict_name;
-        if (creator_map.count(real_name))
-            real_dict_name = real_name;
-        else if (auto real_name_lowercase = Poco::toLower(real_name); case_insensitive_creator_map.count(real_name_lowercase))
-            real_dict_name = real_name_lowercase;
-        else
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: can't create alias '{}', the real name '{}' is not registered",
-                            factory_name, alias_name, real_name);
+        auto real_name_lowercase = Poco::toLower(real_name);
+        if (!creator_map.contains(real_name) && !case_insensitive_creator_map.contains(real_name_lowercase))
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "{}: can't create alias '{}', the real name '{}' is not registered",
+                getFactoryName(),
+                alias_name,
+                real_name);

+        registerAliasUnchecked(alias_name, real_name, case_sensitiveness);
+    }
+
+    /// We need sure the real_name exactly exists when call the function directly.
+    void registerAliasUnchecked(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive)
+    {
        String alias_name_lowercase = Poco::toLower(alias_name);
-
-        if (creator_map.count(alias_name) || case_insensitive_creator_map.count(alias_name_lowercase))
-            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: the alias name '{}' is already registered as real name",
-                            factory_name, alias_name);
+        String real_name_lowercase = Poco::toLower(real_name);
+        const String factory_name = getFactoryName();

        if (case_sensitiveness == CaseInsensitive)
        {
-            if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_dict_name).second)
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique",
-                                factory_name, alias_name);
+            if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_name).second)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique", factory_name, alias_name);
            case_insensitive_name_mapping[alias_name_lowercase] = real_name;
        }

-        if (!aliases.emplace(alias_name, real_dict_name).second)
+        if (!aliases.emplace(alias_name, real_name).second)
            throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: alias name '{}' is not unique", factory_name, alias_name);
    }

+
    std::vector<String> getAllRegisteredNames() const override
    {
        std::vector<String> result;
@ -93,7 +96,7 @@ public:
    bool isCaseInsensitive(const String & name) const
    {
        String name_lowercase = Poco::toLower(name);
-        return getCaseInsensitiveMap().count(name_lowercase) || case_insensitive_aliases.count(name_lowercase);
+        return getCaseInsensitiveMap().contains(name_lowercase) || case_insensitive_aliases.contains(name_lowercase);
    }

    const String & aliasTo(const String & name) const
@ -106,14 +109,11 @@ public:
        throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: name '{}' is not alias", getFactoryName(), name);
    }

-    bool isAlias(const String & name) const
-    {
-        return aliases.count(name) || case_insensitive_aliases.contains(name);
-    }
+    bool isAlias(const String & name) const { return aliases.contains(name) || case_insensitive_aliases.contains(name); }

    bool hasNameOrAlias(const String & name) const
    {
-        return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
+        return getMap().contains(name) || getCaseInsensitiveMap().contains(name) || isAlias(name);
    }

    /// Return the canonical name (the name used in registration) if it's different from `name`.
@ -129,7 +129,7 @@ public:

 private:
    using InnerMap = std::unordered_map<String, Value>; // name -> creator
-    using AliasMap = std::unordered_map<String, String>; // alias -> original type
+    using AliasMap = std::unordered_map<String, String>; // alias -> original name

    virtual const InnerMap & getMap() const = 0;
    virtual const InnerMap & getCaseInsensitiveMap() const = 0;
--- a/src/Common/mysqlxx/Pool.cpp
+++ b/src/Common/mysqlxx/Pool.cpp
@ -25,8 +25,6 @@ void Pool::Entry::incrementRefCount()
    /// First reference, initialize thread
    if (data->ref_count.fetch_add(1) == 0)
        mysql_thread_init();
-
-    chassert(!data->removed_from_pool);
 }


@ -43,9 +41,12 @@ void Pool::Entry::decrementRefCount()
        /// In Pool::Entry::disconnect() we remove connection from the list of pool's connections.
        /// So now we must deallocate the memory.
        if (data->removed_from_pool)
+        {
+            data->conn.disconnect();
            ::delete data;
        }
    }
+}


 Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
@ -230,8 +231,6 @@ void Pool::removeConnection(Connection* connection)
    std::lock_guard lock(mutex);
    if (connection)
    {
-        if (!connection->removed_from_pool)
-            connection->conn.disconnect();
        connections.remove(connection);
        connection->removed_from_pool = true;
    }
@ -240,6 +239,7 @@ void Pool::removeConnection(Connection* connection)

 void Pool::Entry::disconnect()
 {
+    // Remove the Entry from the Pool. Actual disconnection is delayed until refcount == 0.
    pool->removeConnection(data);
 }

--- a/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/double_delta_decompress_fuzzer.cpp
@ -34,7 +34,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + codec->getAdditionalSizeAtTheEndOfBuffer());

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/encrypted_decompress_fuzzer.cpp
@ -292,10 +292,10 @@ try

    DB::Memory<> memory;
    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_128->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_128->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);

    memory.resize(input.size() + codec_128->getAdditionalSizeAtTheEndOfBuffer());
-    codec_256->doDecompressData(input.data(), static_cast<UInt32>(input.size()), memory.data(), static_cast<UInt32>(input.size() - 31));
+    codec_256->doDecompressData(input.data(), input.size(), memory.data(), input.size() - 31);
    return 0;
 }
 catch (...)
--- a/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
+++ b/src/Compression/fuzzers/lz4_decompress_fuzzer.cpp
@ -24,7 +24,7 @@ try
        return 0;

    const auto * p = reinterpret_cast<const AuxiliaryRandomData *>(data);
-    auto codec = DB::getCompressionCodecLZ4(static_cast<int>(p->level));
+    auto codec = DB::getCompressionCodecLZ4(p->level);

    size_t output_buffer_size = p->decompressed_size % 65536;
    size -= sizeof(AuxiliaryRandomData);
@ -37,7 +37,7 @@ try
    DB::Memory<> memory;
    memory.resize(output_buffer_size + LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER);

-    codec->doDecompressData(reinterpret_cast<const char *>(data), static_cast<UInt32>(size), memory.data(), static_cast<UInt32>(output_buffer_size));
+    codec->doDecompressData(reinterpret_cast<const char *>(data), size, memory.data(), output_buffer_size);

    return 0;
 }
--- a/src/Core/Field.h
+++ b/src/Core/Field.h
@ -28,7 +28,6 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
    extern const int LOGICAL_ERROR;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
-    extern const int TOO_DEEP_RECURSION;
 }

 constexpr Null NEGATIVE_INFINITY{Null::Value::NegativeInfinity};
@ -42,13 +41,10 @@ using FieldVector = std::vector<Field, AllocatorWithMemoryTracking<Field>>;
 /// construct a Field of Array or a Tuple type. An alternative approach would be
 /// to construct both of these types from FieldVector, and have the caller
 /// specify the desired Field type explicitly.
-/// As the result stack overflow on destruction is possible
-/// and to avoid it we need to count the depth and have a threshold.
 #define DEFINE_FIELD_VECTOR(X) \
 struct X : public FieldVector \
 { \
    using FieldVector::FieldVector; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_VECTOR(Array);
@ -65,7 +61,6 @@ using FieldMap = std::map<String, Field, std::less<>, AllocatorWithMemoryTrackin
 struct X : public FieldMap \
 { \
    using FieldMap::FieldMap; \
-    uint8_t nested_field_depth = 0; \
 }

 DEFINE_FIELD_MAP(Object);
@ -296,12 +291,6 @@ decltype(auto) castToNearestFieldType(T && x)
  */
 #define DBMS_MIN_FIELD_SIZE 32

-/// Note: uint8_t is used for storing depth value.
-#if defined(SANITIZER) || !defined(NDEBUG)
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 64
-#else
-    #define DBMS_MAX_NESTED_FIELD_DEPTH 255
-#endif

 /** Discriminated union of several types.
  * Made for replacement of `boost::variant`
@ -682,49 +671,6 @@ private:

    Types::Which which;

-    /// StorageType and Original are the same for Array, Tuple, Map, Object
-    template <typename StorageType, typename Original>
-    uint8_t calculateAndCheckFieldDepth(Original && x)
-    {
-        uint8_t result = 0;
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-        {
-            result = x.nested_field_depth;
-
-            auto get_depth = [](const Field & elem)
-            {
-                switch (elem.which)
-                {
-                    case Types::Array:
-                        return elem.template get<Array>().nested_field_depth;
-                    case Types::Tuple:
-                        return elem.template get<Tuple>().nested_field_depth;
-                    case Types::Map:
-                        return elem.template get<Map>().nested_field_depth;
-                    case Types::Object:
-                        return elem.template get<Object>().nested_field_depth;
-                    default:
-                        return static_cast<uint8_t>(0);
-                }
-            };
-
-            if constexpr (std::is_same_v<StorageType, Object>)
-                for (auto & [_, value] : x)
-                    result = std::max(get_depth(value), result);
-            else
-                for (auto & value : x)
-                    result = std::max(get_depth(value), result);
-        }
-
-        if (result >= DBMS_MAX_NESTED_FIELD_DEPTH)
-            throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Too deep Field");
-
-        return result;
-    }

    /// Assuming there was no allocated state or it was deallocated (see destroy).
    template <typename T>
@ -738,17 +684,7 @@ private:
        // we must initialize the entire wide stored type, and not just the
        // nominal type.
        using StorageType = NearestFieldType<UnqualifiedType>;
-
-        /// Incrementing the depth since we create a new Field.
-        auto depth = calculateAndCheckFieldDepth<StorageType>(x);
        new (&storage) StorageType(std::forward<T>(x));
-
-        if constexpr (std::is_same_v<StorageType, Array>
-            || std::is_same_v<StorageType, Tuple>
-            || std::is_same_v<StorageType, Map>
-            || std::is_same_v<StorageType, Object>)
-            reinterpret_cast<StorageType *>(&storage)->nested_field_depth = depth + 1;
-
        which = TypeToEnum<UnqualifiedType>::value;
    }

@ -845,7 +781,7 @@ private:
    }

    template <typename T>
-    ALWAYS_INLINE void destroy()
+    void destroy()
    {
        T * MAY_ALIAS ptr = reinterpret_cast<T*>(&storage);
        ptr->~T();
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -577,6 +577,7 @@ class IColumn;
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
    M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
    M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
+    M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
    M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
    M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
    M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
@ -736,7 +737,7 @@ class IColumn;
    M(String, workload, "default", "Name of workload to be used to access resources", 0) \
    M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
    \
-    M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
+    M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
    \
    M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
    M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
@ -774,6 +775,7 @@ class IColumn;
    M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
    M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
    M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
+    M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
    // End of COMMON_SETTINGS
    // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.

--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
    {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
              {"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@ -62,7 +62,7 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const
    }
    else
    {
-        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth);
+        ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth);
    }

    return getImpl<nullptr_on_error>(ast);
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@ -1521,10 +1521,8 @@ struct Transformer
                if constexpr (std::is_same_v<Additions, DateTimeAccurateConvertStrategyAdditions>
                    || std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
                {
-#   pragma clang diagnostic push
-#   pragma clang diagnostic ignored "-Wimplicit-const-int-float-conversion"
                    bool is_valid_input = vec_from[i] >= 0 && vec_from[i] <= 0xFFFFFFFFL;
-#   pragma clang diagnostic pop
+
                    if (!is_valid_input)
                    {
                        if constexpr (std::is_same_v<Additions, DateTimeAccurateOrNullConvertStrategyAdditions>)
--- a/src/Functions/LowerUpperUTF8Impl.h
+++ b/src/Functions/LowerUpperUTF8Impl.h
@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl
        }
        else
        {
-            static const Poco::UTF8Encoding utf8;
-
            size_t src_sequence_length = UTF8::seqLength(*src);
            /// In case partial buffer was passed (due to SSE optimization)
            /// we cannot convert it with current src_end, but we may have more
--- a/src/Functions/initcap.cpp
+++ b/src/Functions/initcap.cpp
@ -0,0 +1,66 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionStringToString.h>
+#include <Common/StringUtils/StringUtils.h>
+
+namespace DB
+{
+namespace
+{
+
+struct InitcapImpl
+{
+    static void vector(const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+    static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
+    {
+        res_data.resize(data.size());
+        array(data.data(), data.data() + data.size(), res_data.data());
+    }
+
+private:
+    static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
+    {
+        bool prev_alphanum = false;
+
+        for (; src < src_end; ++src, ++dst)
+        {
+            char c = *src;
+            bool alphanum = isAlphaNumericASCII(c);
+            if (alphanum && !prev_alphanum)
+                if (isAlphaASCII(c))
+                    *dst = toUpperIfAlphaASCII(c);
+                else
+                    *dst = c;
+            else if (isAlphaASCII(c))
+                *dst = toLowerIfAlphaASCII(c);
+            else
+                *dst = c;
+            prev_alphanum = alphanum;
+        }
+    }
+};
+
+struct NameInitcap
+{
+    static constexpr auto name = "initcap";
+};
+using FunctionInitcap = FunctionStringToString<InitcapImpl, NameInitcap>;
+
+}
+
+REGISTER_FUNCTION(Initcap)
+{
+    factory.registerFunction<FunctionInitcap>({}, FunctionFactory::CaseInsensitive);
+}
+
+}
--- a/src/Functions/initcapUTF8.cpp
+++ b/src/Functions/initcapUTF8.cpp
@ -0,0 +1,114 @@
+#include <DataTypes/DataTypeString.h>
+#include <Functions/FunctionStringToString.h>
+#include <Functions/LowerUpperUTF8Impl.h>
+#include <Functions/FunctionFactory.h>
+#include <Poco/Unicode.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+namespace
+{
+
+struct InitcapUTF8Impl
+{
+    static void vector(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        ColumnString::Chars & res_data,
+        ColumnString::Offsets & res_offsets)
+    {
+        if (data.empty())
+            return;
+        res_data.resize(data.size());
+        res_offsets.assign(offsets);
+        array(data.data(), data.data() + data.size(), offsets, res_data.data());
+    }
+
+    [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
+    {
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
+    }
+
+    static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum)
+    {
+        size_t src_sequence_length = UTF8::seqLength(*src);
+        auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
+
+        if (src_code_point)
+        {
+            bool alpha = Poco::Unicode::isAlpha(*src_code_point);
+            bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point);
+
+            int dst_code_point = *src_code_point;
+            if (alphanum && !prev_alphanum)
+            {
+                if (alpha)
+                    dst_code_point = Poco::Unicode::toUpper(*src_code_point);
+            }
+            else if (alpha)
+            {
+                dst_code_point = Poco::Unicode::toLower(*src_code_point);
+            }
+            prev_alphanum = alphanum;
+            if (dst_code_point > 0)
+            {
+                size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
+                assert(dst_sequence_length <= 4);
+
+                if (dst_sequence_length == src_sequence_length)
+                {
+                    src += dst_sequence_length;
+                    dst += dst_sequence_length;
+                    return;
+                }
+            }
+        }
+
+        *dst = *src;
+        ++dst;
+        ++src;
+        prev_alphanum = false;
+    }
+
+private:
+
+    static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
+    {
+        const auto * offset_it = offsets.begin();
+        const UInt8 * begin = src;
+
+        /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
+        while (src < src_end)
+        {
+            const UInt8 * row_end = begin + *offset_it;
+            chassert(row_end >= src);
+            bool prev_alphanum = false;
+            while (src < row_end)
+                processCodePoint(src, row_end, dst, prev_alphanum);
+            ++offset_it;
+        }
+    }
+};
+
+struct NameInitcapUTF8
+{
+    static constexpr auto name = "initcapUTF8";
+};
+
+using FunctionInitcapUTF8 = FunctionStringToString<InitcapUTF8Impl, NameInitcapUTF8>;
+
+}
+
+REGISTER_FUNCTION(InitcapUTF8)
+{
+    factory.registerFunction<FunctionInitcapUTF8>();
+}
+
+}
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1524,7 +1524,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const
        uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions;
        if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable())
        {
-            const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns();
+            const auto & insert_structure = DatabaseCatalog::instance()
+                                                .getTable(getInsertionTable(), shared_from_this())
+                                                ->getInMemoryMetadataPtr()
+                                                ->getColumns()
+                                                .getInsertable();
            DB::ColumnsDescription structure_hint;

            bool use_columns_from_insert_query = true;
--- a/src/Interpreters/InterpreterCreateIndexQuery.cpp
+++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp
@ -15,6 +15,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int TABLE_IS_READ_ONLY;
+    extern const int INCORRECT_QUERY;
 }


@ -23,6 +24,21 @@ BlockIO InterpreterCreateIndexQuery::execute()
    auto current_context = getContext();
    const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();

+    // Noop if allow_create_index_without_type = true. throw otherwise
+    if (!create_index.index_decl->as<ASTIndexDeclaration>()->type)
+    {
+        if (!current_context->getSettingsRef().allow_create_index_without_type)
+        {
+            throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden."
+                " SET allow_create_index_without_type=1 to ignore this statements.");
+        }
+        else
+        {
+            // Nothing to do
+            return {};
+        }
+    }
+
    AccessRightsElements required_access;
    required_access.emplace_back(AccessType::ALTER_ADD_INDEX, create_index.getDatabase(), create_index.getTable());

--- a/src/Parsers/ASTCreateIndexQuery.cpp
+++ b/src/Parsers/ASTCreateIndexQuery.cpp
@ -56,7 +56,6 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma

    formatOnCluster(settings);

-    if (!cluster.empty())
    settings.ostr << " ";

    index_decl->formatImpl(settings, state, frame);
--- a/src/Parsers/ASTIndexDeclaration.cpp
+++ b/src/Parsers/ASTIndexDeclaration.cpp
@ -13,8 +13,8 @@ ASTPtr ASTIndexDeclaration::clone() const
    auto res = std::make_shared<ASTIndexDeclaration>();

    res->name = name;
+    if (granularity)
        res->granularity = granularity;
-
    if (expr)
        res->set(res->expr, expr->clone());
    if (type)
@ -24,25 +24,39 @@ ASTPtr ASTIndexDeclaration::clone() const


 void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
+{
+    if (expr)
    {
        if (part_of_create_index_query)
+        {
+            if (expr->as<ASTExpressionList>())
            {
                s.ostr << "(";
                expr->formatImpl(s, state, frame);
                s.ostr << ")";
            }
            else
+            expr->formatImpl(s, state, frame);
+        }
+        else
        {
            s.ostr << backQuoteIfNeed(name);
            s.ostr << " ";
            expr->formatImpl(s, state, frame);
        }
+    }

+    if (type)
+    {
        s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : "");
        type->formatImpl(s, state, frame);
+    }
+    if (granularity)
+    {
        s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : "");
        s.ostr << granularity;
    }
+}

 }

--- a/src/Parsers/ParserCreateIndexQuery.cpp
+++ b/src/Parsers/ParserCreateIndexQuery.cpp
@ -17,24 +17,36 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
 {
    ParserKeyword s_type("TYPE");
    ParserKeyword s_granularity("GRANULARITY");
-
+    ParserToken open(TokenType::OpeningRoundBracket);
+    ParserToken close(TokenType::ClosingRoundBracket);
+    ParserOrderByExpressionList order_list;
    ParserDataType data_type_p;
    ParserExpression expression_p;
    ParserUnsignedInteger granularity_p;

    ASTPtr expr;
+    ASTPtr order;
    ASTPtr type;
    ASTPtr granularity;

    /// Skip name parser for SQL-standard CREATE INDEX
-    if (!expression_p.parse(pos, expr, expected))
+    if (expression_p.parse(pos, expr, expected))
+    {
+    }
+    else if (open.ignore(pos, expected))
+    {
+        if (!order_list.parse(pos, order, expected))
            return false;

-    if (!s_type.ignore(pos, expected))
+        if (!close.ignore(pos, expected))
            return false;
+    }

+    if (s_type.ignore(pos, expected))
+    {
        if (!data_type_p.parse(pos, type, expected))
            return false;
+    }

    if (s_granularity.ignore(pos, expected))
    {
@ -45,13 +57,14 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
    auto index = std::make_shared<ASTIndexDeclaration>();
    index->part_of_create_index_query = true;
    index->set(index->expr, expr);
+    if (type)
        index->set(index->type, type);

    if (granularity)
        index->granularity = granularity->as<ASTLiteral &>().value.safeGet<UInt64>();
    else
    {
-        if (index->type->name == "annoy")
+        if (index->type && index->type->name == "annoy")
            index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY;
        else
            index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY;
--- a/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
+++ b/src/Parsers/fuzzers/codegen_fuzzer/CMakeLists.txt
@ -42,4 +42,4 @@ clickhouse_add_executable(codegen_select_fuzzer ${FUZZER_SRCS})
 set_source_files_properties("${PROTO_SRCS}" "out.cpp" PROPERTIES COMPILE_FLAGS "-Wno-reserved-identifier")

 target_include_directories(codegen_select_fuzzer SYSTEM BEFORE PRIVATE "${CMAKE_CURRENT_BINARY_DIR}")
-target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
+target_link_libraries(codegen_select_fuzzer PRIVATE ch_contrib::protobuf_mutator ch_contrib::protoc dbms ${LIB_FUZZING_ENGINE})
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@ -111,7 +111,7 @@ void optimizePrimaryKeyCondition(const Stack & stack);
 void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes);
 void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
 void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
-bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
+bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections);
 bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes);
 bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes);

--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp
@ -19,6 +19,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
    settings.remove_redundant_distinct = from.query_plan_remove_redundant_distinct;
    settings.optimize_projection = from.optimize_use_projections && from.query_plan_optimize_projection;
    settings.force_use_projection = settings.optimize_projection && from.force_optimize_projection;
+    settings.optimize_use_implicit_projections = settings.optimize_projection && from.optimize_use_implicit_projections;
    return settings;
 }

--- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
+++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h
@ -41,6 +41,7 @@ struct QueryPlanOptimizationSettings
    /// If reading from projection can be applied
    bool optimize_projection = false;
    bool force_use_projection = false;
+    bool optimize_use_implicit_projections = false;

    static QueryPlanOptimizationSettings fromSettings(const Settings & from);
    static QueryPlanOptimizationSettings fromContext(ContextPtr from);
--- a/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
+++ b/src/Processors/QueryPlan/Optimizations/liftUpFunctions.cpp
@ -4,6 +4,7 @@
 #include <Processors/QueryPlan/Optimizations/Optimizations.h>
 #include <Processors/QueryPlan/SortingStep.h>
 #include <Common/Exception.h>
+#include <DataTypes/IDataType.h>

 namespace DB
 {
@ -28,6 +29,20 @@ const DB::DataStream & getChildOutputStream(DB::QueryPlan::Node & node)
 namespace DB::QueryPlanOptimizations
 {

+/// This is a check that output columns does not have the same name
+/// This is ok for DAG, but may introduce a bug in a SotringStep cause columns are selected by name.
+static bool areOutputsConvertableToBlock(const ActionsDAG::NodeRawConstPtrs & outputs)
+{
+    std::unordered_set<std::string_view> names;
+    for (const auto & output : outputs)
+    {
+        if (!names.emplace(output->result_name).second)
+            return false;
+    }
+
+    return true;
+}
+
 size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes)
 {
    if (parent_node->children.size() != 1)
@ -57,6 +72,9 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
    if (unneeded_for_sorting->trivial())
        return 0;

+    if (!areOutputsConvertableToBlock(needed_for_sorting->getOutputs()))
+        return 0;
+
    // Sorting (parent_node) -> Expression (child_node)
    auto & node_with_needed = nodes.emplace_back();
    std::swap(node_with_needed.children, child_node->children);
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@ -126,7 +126,8 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
                    optimizeReadInOrder(*frame.node, nodes);

                if (optimization_settings.optimize_projection)
-                    num_applied_projection += optimizeUseAggregateProjections(*frame.node, nodes);
+                    num_applied_projection
+                        += optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);

                if (optimization_settings.aggregation_in_order)
                    optimizeAggregationInOrder(*frame.node, nodes);
--- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp
@ -433,7 +433,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
    QueryPlan::Node & node,
    AggregatingStep & aggregating,
    ReadFromMergeTree & reading,
-    const std::shared_ptr<PartitionIdToMaxBlock> & max_added_blocks)
+    const std::shared_ptr<PartitionIdToMaxBlock> & max_added_blocks,
+    bool allow_implicit_projections)
 {
    const auto & keys = aggregating.getParams().keys;
    const auto & aggregates = aggregating.getParams().aggregates;
@ -453,7 +454,8 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
        if (projection.type == ProjectionDescription::Type::Aggregate)
            agg_projections.push_back(&projection);

-    bool can_use_minmax_projection = metadata->minmax_count_projection && !reading.getMergeTreeData().has_lightweight_delete_parts.load();
+    bool can_use_minmax_projection = allow_implicit_projections && metadata->minmax_count_projection
+        && !reading.getMergeTreeData().has_lightweight_delete_parts.load();

    if (!can_use_minmax_projection && agg_projections.empty())
        return candidates;
@ -543,7 +545,7 @@ static QueryPlan::Node * findReadingStep(QueryPlan::Node & node)
    return nullptr;
 }

-bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
+bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections)
 {
    if (node.children.size() != 1)
        return false;
@ -568,7 +570,7 @@ bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes &

    std::shared_ptr<PartitionIdToMaxBlock> max_added_blocks = getMaxAddedBlocks(reading);

-    auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks);
+    auto candidates = getAggregateProjectionCandidates(node, *aggregating, *reading, max_added_blocks, allow_implicit_projections);

    AggregateProjectionCandidate * best_candidate = nullptr;
    if (candidates.minmax_projection)
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@ -11,6 +11,7 @@
 #include <Storages/extractKeyExpressionList.h>

 #include <Core/Defines.h>
+#include "Common/Exception.h"


 namespace DB
@ -89,8 +90,16 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast
    result.type = Poco::toLower(index_definition->type->name);
    result.granularity = index_definition->granularity;

-    ASTPtr expr_list = extractKeyExpressionList(index_definition->expr->clone());
+    ASTPtr expr_list;
+    if (index_definition->expr)
+    {
+        expr_list = extractKeyExpressionList(index_definition->expr->clone());
        result.expression_list_ast = expr_list->clone();
+    }
+    else
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expression is not set");
+    }

    auto syntax = TreeRewriter(context).analyze(expr_list, columns.getAllPhysical());
    result.expression = ExpressionAnalyzer(expr_list, syntax, context).getActions(true);
--- a/src/Storages/MergeTree/KeyCondition.cpp
+++ b/src/Storages/MergeTree/KeyCondition.cpp
@ -564,7 +564,17 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
        }
        case (ActionsDAG::ActionType::COLUMN):
        {
-            res = &inverted_dag.addColumn({node.column, node.result_type, node.result_name});
+            String name;
+            if (const auto * column_const = typeid_cast<const ColumnConst *>(node.column.get()))
+                /// Re-generate column name for constant.
+                /// DAG form query (with enabled analyzer) uses suffixes for constants, like 1_UInt8.
+                /// DAG from PK does not use it. This is breakig match by column name sometimes.
+                /// Ideally, we should not compare manes, but DAG subtrees instead.
+                name = ASTLiteral(column_const->getDataColumn()[0]).getColumnName();
+            else
+                name = node.result_name;
+
+            res = &inverted_dag.addColumn({node.column, node.result_type, name});
            break;
        }
        case (ActionsDAG::ActionType::ALIAS):
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -6994,7 +6994,8 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg

    ProjectionCandidate * selected_candidate = nullptr;
    size_t min_sum_marks = std::numeric_limits<size_t>::max();
-    if (metadata_snapshot->minmax_count_projection && !has_lightweight_delete_parts.load(std::memory_order_relaxed)) /// Disable ReadFromStorage for parts with lightweight.
+    if (settings.optimize_use_implicit_projections && metadata_snapshot->minmax_count_projection
+        && !has_lightweight_delete_parts.load(std::memory_order_relaxed)) /// Disable ReadFromStorage for parts with lightweight.
        add_projection_candidate(*metadata_snapshot->minmax_count_projection, true);
    std::optional<ProjectionCandidate> minmax_count_projection_candidate;
    if (!candidates.empty())
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -2448,10 +2448,13 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry)
            if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex())
                throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name);

-            bool zero_copy_enabled = dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
+            /// Don't do hardlinks in case of zero-copy at any side (defensive programming)
+            bool source_zero_copy_enabled = dynamic_cast<const MergeTreeData *>(source_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
+            bool our_zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication;
+
            IDataPartStorage::ClonePartParams clone_params
            {
-                .copy_instead_of_hardlink = zero_copy_enabled && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(),
+                .copy_instead_of_hardlink = (our_zero_copy_enabled || source_zero_copy_enabled) && part_desc->src_table_part->isStoredOnRemoteDiskWithZeroCopySupport(),
                .metadata_version_to_write = metadata_snapshot->getMetadataVersion()
            };
            auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk(
@ -7585,8 +7588,10 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta
            UInt64 index = lock->getNumber();
            MergeTreePartInfo dst_part_info(partition_id, index, index, src_part->info.level);

+            /// Don't do hardlinks in case of zero-copy at any side (defensive programming)
            bool zero_copy_enabled = storage_settings_ptr->allow_remote_fs_zero_copy_replication
                || dynamic_cast<const MergeTreeData *>(dest_table.get())->getSettings()->allow_remote_fs_zero_copy_replication;
+
            IDataPartStorage::ClonePartParams clone_params
            {
                .copy_instead_of_hardlink = zero_copy_enabled && src_part->isStoredOnRemoteDiskWithZeroCopySupport(),
--- a/tests/integration/test_multiple_disks/test.py
+++ b/tests/integration/test_multiple_disks/test.py
@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine):
        node1.query(f"SYSTEM STOP MOVES {name}")
        node1.query(f"SYSTEM STOP MERGES {name}")

+        first_part = None
        for i in range(5):
            data = []  # 5MB in total
-            for i in range(5):
+            for _ in range(5):
                data.append(get_random_string(1024 * 1024))  # 1MB row
            # jbod size is 40MB, so lets insert 5MB batch 7 times
            node1.query_with_retry(
@ -829,8 +830,14 @@ def test_start_stop_moves(start_cluster, name, engine):
                )
            )

+            # we cannot rely simply on modification time of part because it can be changed
+            # by different background operations so we explicitly check after the first
+            # part is inserted
+            if i == 0:
                first_part = get_oldest_part(node1, name)

+        assert first_part is not None
+
        used_disks = get_used_disks_for_table(node1, name)

        retry = 5
--- a/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql
+++ b/tests/queries/0_stateless/01505_trivial_count_with_partition_predicate.sql
@ -6,6 +6,7 @@ create table test1(p DateTime, k int) engine MergeTree partition by toDate(p) or
 insert into test1 values ('2020-09-01 00:01:02', 1), ('2020-09-01 20:01:03', 2), ('2020-09-02 00:01:03', 3);

 set max_rows_to_read = 1;
+set optimize_use_implicit_projections = 1;
 -- non-optimized
 select count() from test1 settings max_parallel_replicas = 3;
 -- optimized (toYear is monotonic and we provide the partition expr as is)
--- a/tests/queries/0_stateless/01710_minmax_count_projection.sql
+++ b/tests/queries/0_stateless/01710_minmax_count_projection.sql
@ -4,7 +4,7 @@ create table d (i int, j int) engine MergeTree partition by i % 2 order by tuple

 insert into d select number, number from numbers(10000);

-set max_rows_to_read = 2, optimize_use_projections = 1;
+set max_rows_to_read = 2, optimize_use_projections = 1, optimize_use_implicit_projections = 1;

 select min(i), max(i), count() from d;
 select min(i), max(i), count() from d group by _partition_id order by _partition_id;
--- a/tests/queries/0_stateless/01739_index_hint.reference
+++ b/tests/queries/0_stateless/01739_index_hint.reference
@ -30,6 +30,6 @@ SELECT sum(t) FROM XXXX WHERE indexHint(t = 42);
 drop table if exists XXXX;
 create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings index_granularity=8192;
 insert into XXXX select number*60, 0 from numbers(100000);
-SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0));
+SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1;
 100000
 drop table XXXX;
--- a/tests/queries/0_stateless/01739_index_hint.sql
+++ b/tests/queries/0_stateless/01739_index_hint.sql
@ -30,6 +30,6 @@ create table XXXX (t Int64, f Float64) Engine=MergeTree order by t settings inde

 insert into XXXX select number*60, 0 from numbers(100000);

-SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0));
+SELECT count() FROM XXXX WHERE indexHint(t = toDateTime(0)) SETTINGS optimize_use_implicit_projections = 1;

 drop table XXXX;
--- a/tests/queries/0_stateless/01848_partition_value_column.sql
+++ b/tests/queries/0_stateless/01848_partition_value_column.sql
@ -5,6 +5,8 @@ create table tbl(dt DateTime, i int, j String, v Float64) engine MergeTree parti

 insert into tbl values ('2021-04-01 00:01:02', 1, '123', 4), ('2021-04-01 01:01:02', 1, '12', 4), ('2021-04-01 02:11:02', 2, '345', 4), ('2021-04-01 04:31:02', 2, '2', 4), ('2021-04-02 00:01:02', 1, '1234', 4), ('2021-04-02 00:01:02', 2, '123', 4), ('2021-04-02 00:01:02', 3, '12', 4), ('2021-04-02 00:01:02', 4, '1', 4);

+set optimize_use_implicit_projections = 1;
+
 select count() from tbl where _partition_value = ('2021-04-01', 1, 2) settings max_rows_to_read = 1;
 select count() from tbl where _partition_value.1 = '2021-04-01' settings max_rows_to_read = 4;
 select count() from tbl where _partition_value.2 = 0 settings max_rows_to_read = 4;
--- a/tests/queries/0_stateless/02273_full_sort_join.sql.j2
+++ b/tests/queries/0_stateless/02273_full_sort_join.sql.j2
@ -1,4 +1,6 @@
-- Tags: long
+-- Tags: long, no-upgrade-check
+
+-- TODO(@vdimir): remove no-upgrade-check tag after https://github.com/ClickHouse/ClickHouse/pull/51737 is released

 DROP TABLE IF EXISTS t1;
 DROP TABLE IF EXISTS t2;
--- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
+++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql
@ -18,7 +18,7 @@ select distinct a from distinct_in_order settings max_block_size=10, max_threads

 select '-- create table with not only primary key columns';
 drop table if exists distinct_in_order sync;
-create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b);
+create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000);

 select '-- distinct with primary key prefix only';
@ -59,16 +59,16 @@ drop table if exists distinct_in_order sync;

 select '-- check that distinct in order returns the same result as ordinary distinct';
 drop table if exists distinct_cardinality_low sync;
-CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium);
+CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number % 1e3 FROM numbers_mt(1e4);

 drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -76,9 +76,9 @@ drop table if exists distinct_in_order sync;
 drop table if exists ordinary_distinct sync;

 select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -86,9 +86,9 @@ drop table if exists distinct_in_order;
 drop table if exists ordinary_distinct;

 select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
-create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
-create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
+create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
 select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);

@ -102,12 +102,12 @@ drop table if exists sorting_key_contain_function;

 select '-- bug 42185, distinct in order and empty sort description';
 select '-- distinct in order, sorting key tuple()';
-create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple();
+create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10);
 select distinct a from sorting_key_empty_tuple;

 select '-- distinct in order, sorting key contains function';
-create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime));
+create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
 insert into sorting_key_contain_function values ('2000-01-01', 1);
 insert into sorting_key_contain_function values ('2000-01-01', 2);
 select distinct datetime from sorting_key_contain_function;
--- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
+++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference
@ -364,6 +364,8 @@ in
 inIgnoreSet
 indexHint
 indexOf
+initcap
+initcapUTF8
 initialQueryID
 initializeAggregation
 intDiv
--- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference
+++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.reference
@ -17,7 +17,7 @@ INSERT and READ INSERT
 DROP
 CHECK with query_log
 QueryFinish	INSERT INTO times SELECT now() + INTERVAL 1 day SETTINGS optimize_on_insert = 0;	FileOpen	8
-QueryFinish	SELECT \'1\', min(t) FROM times;	FileOpen	0
+QueryFinish	SELECT \'1\', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1;	FileOpen	0
 QueryFinish	INSERT INTO times SELECT now() + INTERVAL 2 day SETTINGS optimize_on_insert = 0;	FileOpen	8
-QueryFinish	SELECT \'2\', min(t) FROM times;	FileOpen	0
+QueryFinish	SELECT \'2\', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1;	FileOpen	0
 QueryFinish	INSERT INTO times SELECT now() + INTERVAL 3 day SETTINGS optimize_on_insert = 0;	FileOpen	8
--- a/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
+++ b/tests/queries/0_stateless/02675_profile_events_from_query_log_and_client.sh
@ -44,13 +44,13 @@ INSERT INTO times SELECT now() + INTERVAL 1 day SETTINGS optimize_on_insert = 0;

 echo "READ"
 $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1  -nq "
-SELECT '1', min(t) FROM times;
+SELECT '1', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1;
 " 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ '

 echo "INSERT and READ INSERT"
 $CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1  -nq "
 INSERT INTO times SELECT now() + INTERVAL 2 day SETTINGS optimize_on_insert = 0;
-SELECT '2', min(t) FROM times;
+SELECT '2', min(t) FROM times SETTINGS optimize_use_implicit_projections = 1;
 INSERT INTO times SELECT now() + INTERVAL 3 day SETTINGS optimize_on_insert = 0;
 " 2>&1 | grep -o -e '\ \[\ .*\ \]\ FileOpen:\ .*\ '

--- a/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.sql
+++ b/tests/queries/0_stateless/02680_illegal_type_of_filter_projection.sql
@ -1,3 +1,3 @@
 CREATE TABLE test_tuple (`p` DateTime, `i` int, `j` int) ENGINE = MergeTree PARTITION BY (toDate(p), i) ORDER BY j SETTINGS index_granularity = 1;
 insert into test_tuple values (1, 1, 1);
-SELECT count() FROM test_tuple PREWHERE sipHash64(sipHash64(p, toString(toDate(p))), toString(toDate(p))) % -0. WHERE i > NULL settings optimize_trivial_count_query=0; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
+SELECT count() FROM test_tuple PREWHERE sipHash64(sipHash64(p, toString(toDate(p))), toString(toDate(p))) % -0. WHERE i > NULL settings optimize_trivial_count_query=0, optimize_use_implicit_projections=1; -- { serverError ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }
--- a/tests/queries/0_stateless/02725_memory-for-merges.sql
+++ b/tests/queries/0_stateless/02725_memory-for-merges.sql
@ -22,6 +22,6 @@ OPTIMIZE TABLE 02725_memory_for_merges FINAL;
 SYSTEM FLUSH LOGS;

 WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid
-SELECT sum(peak_memory_usage) < 1024 * 1024 * 200 from system.part_log where table_uuid=uuid and event_type='MergeParts';
+SELECT (sum(peak_memory_usage) < 1024 * 1024 * 200 AS x) ? x : sum(peak_memory_usage) from system.part_log where table_uuid=uuid and event_type='MergeParts';

 DROP TABLE IF EXISTS 02725_memory_for_merges SYNC;
--- a/tests/queries/0_stateless/02732_rename_after_processing.reference
+++ b/tests/queries/0_stateless/02732_rename_after_processing.reference
@ -19,3 +19,6 @@ OK
 tmp5.csv
 OK
 tmp5.csv
+4
+tmp6.csv.processed
+!tmp6.csv
--- a/tests/queries/0_stateless/02732_rename_after_processing.sh
+++ b/tests/queries/0_stateless/02732_rename_after_processing.sh
@ -29,6 +29,7 @@ cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_1.csv
 cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp3_2.csv
 cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp4.csv
 cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp5.csv
+cp ${tmp_dir}/tmp.csv ${tmp_dir}/tmp6.csv

 ### Checking that renaming works

@ -115,5 +116,14 @@ if [ -e "${tmp_dir}/tmp5.csv" ]; then
    echo "tmp5.csv"
 fi

+# check full file name placeholder
+${CLICKHOUSE_CLIENT} --rename-files-after-processing="%a.processed" -q "SELECT COUNT(*) FROM file('${unique_name}/tmp6.csv')"
+if [ -e "${tmp_dir}/tmp6.csv.processed" ]; then
+  echo "tmp6.csv.processed"
+fi
+if [ ! -e "${tmp_dir}/tmp6.csv" ]; then
+    echo "!tmp6.csv"
+fi
+
 # Clean
 rm -rd $tmp_dir
--- a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference
+++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.reference
--- a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql
+++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug.sql
@ -0,0 +1,133 @@
+drop table if exists test;
+drop table if exists test1;
+
+CREATE TABLE test
+(
+    `pt` String,
+    `count_distinct_exposure_uv` AggregateFunction(uniqHLL12, Int64)
+)
+ENGINE = AggregatingMergeTree
+ORDER BY pt;
+
+SELECT  *
+FROM
+(
+        SELECT  m0.pt                                                                                                                   AS pt
+               ,m0.`exposure_uv`                                                                                                        AS exposure_uv
+               ,round(m2.exposure_uv,4)                                                                                                 AS exposure_uv_hb_last_value
+               ,if(m2.exposure_uv IS NULL OR m2.exposure_uv = 0,NULL,round((m0.exposure_uv - m2.exposure_uv) * 1.0 / m2.exposure_uv,4)) AS exposure_uv_hb_diff_percent
+               ,round(m1.exposure_uv,4)                                                                                                 AS exposure_uv_tb_last_value
+               ,if(m1.exposure_uv IS NULL OR m1.exposure_uv = 0,NULL,round((m0.exposure_uv - m1.exposure_uv) * 1.0 / m1.exposure_uv,4)) AS exposure_uv_tb_diff_percent
+        FROM
+        (
+                SELECT  m0.pt                          AS pt
+                       ,`exposure_uv`                  AS `exposure_uv`
+                FROM
+                (
+                        SELECT  pt                                                                     AS pt
+                               ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`)  ELSE 0 END AS `exposure_uv`
+                        FROM
+                        (
+                                SELECT  pt                                         AS pt
+                                       ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv`
+                                FROM test
+                                GROUP BY  pt
+                        ) m
+                        GROUP BY  pt
+                ) m0
+        ) m0
+        LEFT JOIN
+        (
+                SELECT  m0.pt                          AS pt
+                       ,`exposure_uv`                  AS `exposure_uv`
+                FROM
+                (
+                        SELECT  formatDateTime(addYears(parseDateTimeBestEffort(pt),1),'%Y%m%d')       AS pt
+                               ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`)  ELSE 0 END AS `exposure_uv`
+                        FROM
+                        (
+                                SELECT  pt                                         AS pt
+                                       ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv`
+                                FROM test
+                                GROUP BY  pt
+                        ) m
+                        GROUP BY  pt
+                ) m0
+        ) m1
+        ON m0.pt = m1.pt
+        LEFT JOIN
+        (
+                SELECT  m0.pt                          AS pt
+                       ,`exposure_uv`                  AS `exposure_uv`
+                FROM
+                (
+                        SELECT  formatDateTime(addDays(toDate(parseDateTimeBestEffort(pt)),1),'%Y%m%d') AS pt
+                               ,CASE WHEN COUNT(`exposure_uv`) > 0 THEN AVG(`exposure_uv`)  ELSE 0 END  AS `exposure_uv`
+                        FROM
+                        (
+                                SELECT  pt                                         AS pt
+                                       ,uniqHLL12Merge(count_distinct_exposure_uv) AS `exposure_uv`
+                                FROM test
+                                GROUP BY  pt
+                        ) m
+                        GROUP BY  pt
+                ) m0
+        ) m2
+        ON m0.pt = m2.pt
+) c0
+ORDER BY pt ASC, exposure_uv DESC
+settings join_use_nulls = 1;
+
+CREATE TABLE test1
+(
+    `pt` String,
+    `exposure_uv` Float64
+)
+ENGINE = Memory;
+
+SELECT  *
+FROM
+(
+        SELECT  m0.pt
+               ,m0.exposure_uv AS exposure_uv
+               ,round(m2.exposure_uv,4)
+        FROM
+        (
+                SELECT  pt
+                       ,exposure_uv
+                FROM test1
+        ) m0
+        LEFT JOIN
+        (
+                SELECT  pt
+                       ,exposure_uv
+                FROM test1
+        ) m1
+        ON m0.pt = m1.pt
+        LEFT JOIN
+        (
+                SELECT  pt
+                        ,exposure_uv
+                FROM test1
+        ) m2
+        ON m0.pt = m2.pt
+) c0
+ORDER BY exposure_uv
+settings join_use_nulls = 1;
+
+SELECT
+    pt AS pt,
+    exposure_uv AS exposure_uv
+FROM
+(
+    SELECT
+        pt
+    FROM test1
+) AS m0
+FULL OUTER JOIN
+(
+    SELECT
+        pt,
+        exposure_uv
+    FROM test1
+) AS m1 ON m0.pt = m1.pt;
--- a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference
+++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.reference
@ -0,0 +1,3 @@
+20230626	0.3156979034107179	\N	\N
+20230626	0.2624629016490004	\N	\N
+20230626	0.19390556368960468	\N	\N
--- a/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql
+++ b/tests/queries/0_stateless/02789_functions_after_sorting_and_columns_with_same_names_bug_2.sql
@ -0,0 +1,107 @@
+create table test1 (
+    `pt` String,
+    `brand_name` String,
+    `total_indirect_order_cnt` Float64,
+    `total_indirect_gmv` Float64
+) ENGINE = Memory;
+
+create table test2 (
+    `pt` String,
+    `brand_name` String,
+    `exposure_uv` Float64,
+    `click_uv` Float64
+) ENGINE = Memory;
+
+INSERT INTO test1 (`pt`, `brand_name`, `total_indirect_order_cnt`, `total_indirect_gmv`) VALUES ('20230625', 'LINING', 2232, 1008710), ('20230625', 'adidas', 125, 58820), ('20230625', 'Nike', 1291, 1033020), ('20230626', 'Nike', 1145, 938926), ('20230626', 'LINING', 1904, 853336), ('20230626', 'adidas', 133, 62546), ('20220626', 'LINING', 3747, 1855203), ('20220626', 'Nike', 2295, 1742665), ('20220626', 'adidas', 302, 122388);
+
+INSERT INTO test2 (`pt`, `brand_name`, `exposure_uv`, `click_uv`) VALUES ('20230625', 'Nike', 2012913, 612831),  ('20230625', 'adidas', 480277, 96176), ('20230625', 'LINING', 2474234, 627814), ('20230626', 'Nike', 1934666, 610770), ('20230626', 'adidas', 469904, 91117), ('20230626', 'LINING', 2285142, 599765), ('20220626', 'Nike', 2979656, 937166), ('20220626', 'adidas', 704751, 124250), ('20220626', 'LINING', 3163884, 1010221);
+
+SELECT * FROM (
+        SELECT  m0.pt                                                                                     AS pt
+               ,m0.`uvctr`                                                                                AS uvctr
+               ,round(m1.uvctr,4)                                                                         AS uvctr_hb_last_value
+               ,round(m2.uvctr,4)                                                                         AS uvctr_tb_last_value
+        FROM
+        (
+                SELECT  m0.pt                                                                                                           AS pt
+                       ,COALESCE(m0.brand_name,m1.brand_name)                                                                           AS brand_name
+                       ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr`
+                FROM
+                (
+                                SELECT  pt          AS pt
+                                       ,brand_name  AS `brand_name`
+                                       ,exposure_uv AS `exposure_uv`
+                                       ,click_uv    AS `click_uv`
+                                FROM test2
+                                WHERE pt = '20230626'
+                ) m0
+                FULL JOIN
+                (
+                                SELECT  pt                        AS pt
+                                       ,brand_name                AS `brand_name`
+                                       ,total_indirect_order_cnt  AS `total_indirect_order_cnt`
+                                       ,total_indirect_gmv        AS `total_indirect_gmv`
+                                FROM test1
+                                WHERE pt = '20230626'
+                ) m1
+                ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt
+        ) m0
+        LEFT JOIN
+        (
+                SELECT  m0.pt AS pt
+                       ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr`
+                       ,COALESCE(m0.brand_name,m1.brand_name)                                                                 AS brand_name
+                       ,`exposure_uv`                                                                                         AS `exposure_uv`
+                       ,`click_uv`
+                FROM
+                (
+                                SELECT  pt          AS pt
+                                       ,brand_name  AS `brand_name`
+                                       ,exposure_uv AS `exposure_uv`
+                                       ,click_uv    AS `click_uv`
+                                FROM test2
+                                WHERE pt = '20230625'
+                ) m0
+                FULL JOIN
+                (
+                                SELECT  pt                       AS pt
+                                       ,brand_name               AS `brand_name`
+                                       ,total_indirect_order_cnt AS `total_indirect_order_cnt`
+                                       ,total_indirect_gmv       AS `total_indirect_gmv`
+                                FROM test1
+                                WHERE pt = '20230625'
+                ) m1
+                ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt
+        ) m1
+        ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt
+        LEFT JOIN
+        (
+                SELECT  m0.pt AS pt
+                       ,if(isNaN(`click_uv` / `exposure_uv`) OR isInfinite(`click_uv` / `exposure_uv`),NULL,`click_uv` / `exposure_uv`) AS `uvctr`
+                       ,COALESCE(m0.brand_name,m1.brand_name)                                                                 AS brand_name
+                       ,`exposure_uv`                                                                                         AS `exposure_uv`
+                       ,`click_uv`
+                FROM
+                (
+                                SELECT  pt          AS pt
+                                       ,brand_name  AS `brand_name`
+                                       ,exposure_uv AS `exposure_uv`
+                                       ,click_uv    AS `click_uv`
+                                FROM test2
+                                WHERE pt = '20220626'
+                ) m0
+                FULL JOIN
+                (
+                                SELECT  pt                        AS pt
+                                       ,brand_name                AS `brand_name`
+                                       ,total_indirect_order_cnt  AS `total_indirect_order_cnt`
+                                       ,total_indirect_gmv        AS `total_indirect_gmv`
+                                FROM test1
+                                WHERE pt = '20220626'
+                ) m1
+                ON m0.brand_name = m1.brand_name AND m0.pt = m1.pt
+        ) m2
+        ON m0.brand_name = m2.brand_name AND m0.pt = m2.pt
+) c0
+ORDER BY pt ASC, uvctr DESC;
+
--- a/tests/queries/0_stateless/02810_initcap.reference
+++ b/tests/queries/0_stateless/02810_initcap.reference
@ -0,0 +1,13 @@
+
+Hello
+Hello
+Hello World
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Crc32ieee Is The Best Function
+42ok
+
+Hello
+Yeah, Well, I`M Gonna Go Build My Own Theme Park
+Привет, Как Дела?
+Ätsch, Bätsch
+We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ.
--- a/tests/queries/0_stateless/02810_initcap.sql
+++ b/tests/queries/0_stateless/02810_initcap.sql
@ -0,0 +1,14 @@
+select initcap('');
+select initcap('Hello');
+select initcap('hello');
+select initcap('hello world');
+select initcap('yeah, well, i`m gonna go build my own theme park');
+select initcap('CRC32IEEE is the best function');
+select initcap('42oK');
+
+select initcapUTF8('');
+select initcapUTF8('Hello');
+select initcapUTF8('yeah, well, i`m gonna go build my own theme park');
+select initcapUTF8('привет, как дела?');
+select initcapUTF8('ätsch, bätsch');
+select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.');
--- a/tests/queries/0_stateless/02811_insert_schema_inference.reference
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.reference
--- a/tests/queries/0_stateless/02811_insert_schema_inference.sql
+++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql
@ -0,0 +1,9 @@
+drop table if exists test;
+create table test
+(
+   n1 UInt32,
+   n2 UInt32 alias murmurHash3_32(n1),
+   n3 UInt32 materialized n2 + 1
+)engine=MergeTree order by n1;
+insert into test select * from generateRandom() limit 10;
+drop table test;
--- a/tests/queries/0_stateless/02813_array_concat_agg.reference
+++ b/tests/queries/0_stateless/02813_array_concat_agg.reference
@ -0,0 +1,5 @@
+[1,2,3,4,5,6]
+[1,2,3,4,5,6]
+1	[1,2,3]
+2	[4,5]
+3	[6]
--- a/tests/queries/0_stateless/02813_array_concat_agg.sql
+++ b/tests/queries/0_stateless/02813_array_concat_agg.sql
@ -0,0 +1,9 @@
+drop table if exists t;
+
+create table t (n UInt32, a Array(Int32)) engine=Memory;
+insert into t values (1, [1,2,3]), (2, [4,5]), (3, [6]);
+
+select array_concat_agg(a) from t;
+select ArrAy_cOncAt_aGg(a) from t;
+select n, array_concat_agg(a) from t group by n order by n;
+drop table t;
--- a/tests/queries/0_stateless/02813_create_index_noop.reference
+++ b/tests/queries/0_stateless/02813_create_index_noop.reference
--- a/tests/queries/0_stateless/02813_create_index_noop.sql
+++ b/tests/queries/0_stateless/02813_create_index_noop.sql
--- a/tests/queries/1_stateful/00172_early_constant_folding.sql
+++ b/tests/queries/1_stateful/00172_early_constant_folding.sql
@ -1,4 +1,5 @@
 -- Tags: no-parallel-replicas

 set max_threads=10;
+set optimize_use_implicit_projections=1;
 EXPLAIN PIPELINE SELECT count(JavaEnable) FROM test.hits WHERE WatchID = 1 OR Title = 'next' OR URL = 'prev' OR URL = '???' OR 1;
--- a/tests/sqllogic/connection.py
+++ b/tests/sqllogic/connection.py
@ -62,7 +62,7 @@ def default_clickhouse_odbc_conn_str():
    return str(
        OdbcConnectingArgs.create_from_kw(
            dsn="ClickHouse DSN (ANSI)",
-            Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1",
+            Url="http://localhost:8123/query?default_format=ODBCDriver2&default_table_engine=MergeTree&union_default_mode=DISTINCT&group_by_use_nulls=1&join_use_nulls=1&allow_create_index_without_type=1",
        )
    )

--- a/utils/check-style/aspell-ignore/en/aspell-dict.txt
+++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt
@ -991,6 +991,7 @@ addressToLine
 addressToLineWithInlines
 addressToSymbol
 adviced
+agg
 aggregatefunction
 aggregatingmergetree
 aggregatio
@ -1582,6 +1583,8 @@ indexOf
 infi
 initialQueryID
 initializeAggregation
+initcap
+initcapUTF
 injective
 innogames
 inodes
--- a/utils/self-extracting-executable/decompressor.cpp
+++ b/utils/self-extracting-executable/decompressor.cpp
@ -362,11 +362,12 @@ int decompressFiles(int input_fd, char * path, char * name, bool & have_compress

 #else

-    int read_exe_path(char *exe, size_t/* buf_sz*/)
+    int read_exe_path(char *exe, size_t buf_sz)
    {
-        if (realpath("/proc/self/exe", exe) == nullptr)
-            return 1;
-        return 0;
+        ssize_t n = readlink("/proc/self/exe", exe, buf_sz - 1);
+        if (n > 0)
+            exe[n] = '\0';
+        return n > 0 && n < static_cast<ssize_t>(buf_sz);
    }

 #endif
@ -430,20 +431,18 @@ int main(int/* argc*/, char* argv[])
        return 1;
    }

-    int lock = -1;
-    /// Protection from double decompression
 #if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
    /// get inode of this executable
    uint64_t inode = getInode(self);
-    /// In some cases /proc/self/maps may not contain the inode for the
-    /// /proc/self/exe, one of such examples are using qemu-*-static, in this
-    /// case maps will be proxied through the qemu, and it will remove
-    /// information about itself from it.
-    if (inode != 0)
+    if (inode == 0)
    {
+        std::cerr << "Unable to obtain inode for exe '" << self << "'." << std::endl;
+        return 1;
+    }
+
    std::stringstream lock_path; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
    lock_path << "/tmp/" << name << ".decompression." << inode << ".lock";
-        lock = open(lock_path.str().c_str(), O_CREAT | O_RDWR, 0666);
+    int lock = open(lock_path.str().c_str(), O_CREAT | O_RDWR, 0666);
    if (lock < 0)
    {
        perror("lock open");
@ -482,7 +481,6 @@ int main(int/* argc*/, char* argv[])
        printf("No target executable - decompression only was performed.\n");
        return 0;
    }
-    }
 #endif

    int input_fd = open(self, O_RDONLY);
@ -549,19 +547,21 @@ int main(int/* argc*/, char* argv[])

        if (has_exec)
        {
+#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
            /// write one byte to the lock in case other copies of compressed are running to indicate that
            /// execution should be performed
-            if (lock >= 0)
            write(lock, "1", 1);
+#endif
            execv(self, argv);

            /// This part of code will be reached only if error happened
            perror("execv");
            return 1;
        }
+#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
        /// since inodes can be reused - it's a precaution if lock file already exists and have size of 1
-        if (lock >= 0)
        ftruncate(lock, 0);
+#endif

        printf("No target executable - decompression only was performed.\n");
    }