Merge branch 'ClickHouse:master' into master

2024-09-19 16:20:50 +00:00 · 2024-08-24 17:18:54 +08:00 · 2024-08-24 17:18:54 +08:00 · 6afe3fc500
commit 6afe3fc500
parent 35db026e33 d3f3bc3565
35 changed files with 488 additions and 303 deletions
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
 `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.

 Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
-It is usually not necessary to specify the primary key in addition to the primary key.
+It is usually not necessary to specify the primary key in addition to the sorting key.

 #### SAMPLE BY

--- a/src/Access/RoleCache.cpp
+++ b/src/Access/RoleCache.cpp
@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO
    SubscriptionsOnRoles new_subscriptions_on_roles;
    new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());

-    auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); };
+    auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); };

    for (const auto & current_role : enabled_roles.params.current_roles)
        collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -72,11 +72,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
    {"24.9",
        {
            {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
+            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
+            {"create_if_not_exists", false, false, "New setting."},
+            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
        }
    },
    {"24.8",
        {
-            {"create_if_not_exists", false, false, "New setting."},
            {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
            {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
            {"restore_replace_external_engines_to_null", false, false, "New setting."},
@ -85,7 +87,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
            {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
            {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
-            {"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
            {"query_cache_tag", "", "", "New setting for labeling query cache settings."},
            {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
            {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
@ -93,7 +94,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
            {"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
            {"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
-            {"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
            {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
            {"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
        }
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
        && ast_to_str(ttl) == ast_to_str(other.ttl);
 }

-void ColumnDescription::writeText(WriteBuffer & buf) const
+String formatASTStateAware(IAST & ast, IAST::FormatState & state)
+{
+    WriteBufferFromOwnString buf;
+    IAST::FormatSettings settings(buf, true, false);
+    ast.formatImpl(settings, state, IAST::FormatStateStacked());
+    return buf.str();
+}
+
+void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const
 {
    /// NOTE: Serialization format is insane.

@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
        writeChar('\t', buf);
        DB::writeText(DB::toString(default_desc.kind), buf);
        writeChar('\t', buf);
-        writeEscapedString(queryToString(default_desc.expression), buf);
+        writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf);
    }

-    if (!comment.empty())
+    if (!comment.empty() && include_comment)
    {
        writeChar('\t', buf);
        DB::writeText("COMMENT ", buf);
-        writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf);
+        auto ast = ASTLiteral(Field(comment));
+        writeEscapedString(formatASTStateAware(ast, state), buf);
    }

    if (codec)
    {
        writeChar('\t', buf);
-        writeEscapedString(queryToString(codec), buf);
+        writeEscapedString(formatASTStateAware(*codec, state), buf);
    }

    if (!settings.empty())
@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
        ASTSetQuery ast;
        ast.is_standalone = false;
        ast.changes = settings;
-        writeEscapedString(queryToString(ast), buf);
+        writeEscapedString(formatASTStateAware(ast, state), buf);
        DB::writeText(")", buf);
    }

    if (!statistics.empty())
    {
        writeChar('\t', buf);
-        writeEscapedString(queryToString(statistics.getAST()), buf);
+        writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf);
    }

    if (ttl)
    {
        writeChar('\t', buf);
        DB::writeText("TTL ", buf);
-        writeEscapedString(queryToString(ttl), buf);
+        writeEscapedString(formatASTStateAware(*ttl, state), buf);
    }

    writeChar('\n', buf);
@ -895,16 +904,17 @@ void ColumnsDescription::resetColumnTTLs()
 }


-String ColumnsDescription::toString() const
+String ColumnsDescription::toString(bool include_comments) const
 {
    WriteBufferFromOwnString buf;
+    IAST::FormatState ast_format_state;

    writeCString("columns format version: 1\n", buf);
    DB::writeText(columns.size(), buf);
    writeCString(" columns:\n", buf);

    for (const ColumnDescription & column : columns)
-        column.writeText(buf);
+        column.writeText(buf, ast_format_state, include_comments);

    return buf.str();
 }
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@ -104,7 +104,7 @@ struct ColumnDescription
    bool operator==(const ColumnDescription & other) const;
    bool operator!=(const ColumnDescription & other) const { return !(*this == other); }

-    void writeText(WriteBuffer & buf) const;
+    void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const;
    void readText(ReadBuffer & buf);
 };

@ -137,7 +137,7 @@ public:
    /// NOTE Must correspond with Nested::flatten function.
    void flattenNested(); /// TODO: remove, insert already flattened Nested columns.

-    bool operator==(const ColumnsDescription & other) const { return columns == other.columns; }
+    bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); }
    bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }

    auto begin() const { return columns.begin(); }
@ -221,7 +221,7 @@ public:
    /// Does column has non default specified compression codec
    bool hasCompressionCodec(const String & column_name) const;

-    String toString() const;
+    String toString(bool include_comments = true) const;
    static ColumnsDescription parse(const String & str);

    size_t size() const
--- a/src/Storages/Hive/StorageHive.cpp
+++ b/src/Storages/Hive/StorageHive.cpp
@ -444,8 +444,8 @@ StorageHive::StorageHive(
    storage_metadata.setComment(comment_);
    storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());

+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
    setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
 }

 void StorageHive::lazyInitialize()
--- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp
@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
    if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
        sample_path = getPathSample(metadata, context);

-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
    setInMemoryMetadata(metadata);
 }

--- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp
@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
    if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
        sample_path = getPathSample(metadata, context_);

-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
    setInMemoryMetadata(metadata);
 }

--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp
@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
                  .filename = &filename,
                  .last_modified = object_info->metadata->last_modified,
                  .etag = &(object_info->metadata->etag)
-                }, getContext(), read_from_format_info.columns_description);
+                }, getContext());

            const auto & partition_columns = configuration->getPartitionColumns();
            if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
    const std::shared_ptr<IIterator> & file_iterator,
    const ConfigurationPtr & configuration,
    const ObjectStoragePtr & object_storage,
-    const ReadFromFormatInfo & read_from_format_info,
+    ReadFromFormatInfo & read_from_format_info,
    const std::optional<FormatSettings> & format_settings,
    const std::shared_ptr<const KeyCondition> & key_condition_,
    const ContextPtr & context_,
--- a/src/Storages/ObjectStorage/StorageObjectStorageSource.h
+++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.h
@ -74,7 +74,7 @@ protected:
    const UInt64 max_block_size;
    const bool need_only_count;
    const size_t max_parsing_threads;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
    const std::shared_ptr<ThreadPool> create_reader_pool;

    std::shared_ptr<IIterator> file_iterator;
@ -122,7 +122,7 @@ protected:
        const std::shared_ptr<IIterator> & file_iterator,
        const ConfigurationPtr & configuration,
        const ObjectStoragePtr & object_storage,
-        const ReadFromFormatInfo & read_from_format_info,
+        ReadFromFormatInfo & read_from_format_info,
        const std::optional<FormatSettings> & format_settings,
        const std::shared_ptr<const KeyCondition> & key_condition_,
        const ContextPtr & context_,
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp
@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
                    {
                        .path = path,
                        .size = reader.getObjectInfo()->metadata->size_bytes
-                    }, getContext(), read_from_format_info.columns_description);
+                    }, getContext());

                return chunk;
            }
--- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
+++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.h
@ -128,7 +128,7 @@ private:
    const std::shared_ptr<FileIterator> file_iterator;
    const ConfigurationPtr configuration;
    const ObjectStoragePtr object_storage;
-    const ReadFromFormatInfo read_from_format_info;
+    ReadFromFormatInfo read_from_format_info;
    const std::optional<FormatSettings> format_settings;
    const ObjectStorageQueueSettings queue_settings;
    const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;
--- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
+++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp
@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
    storage_metadata.setColumns(columns);
    storage_metadata.setConstraints(constraints_);
    storage_metadata.setComment(comment);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
    setInMemoryMetadata(storage_metadata);

    LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)

    storage_metadata.setConstraints(args.constraints);
    storage_metadata.setComment(args.comment);
-    setInMemoryMetadata(storage_metadata);

-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
+    setInMemoryMetadata(storage_metadata);
 }


@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
                    .size = current_file_size,
                    .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
                    .last_modified = current_file_last_modified
-                }, getContext(), columns_description);
+                }, getContext());

            return chunk;
        }
--- a/src/Storages/StorageFileCluster.cpp
+++ b/src/Storages/StorageFileCluster.cpp
@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
    }

    storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
    setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
 }

 void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -6340,7 +6340,7 @@ void StorageReplicatedMergeTree::alter(
                                "Metadata on replica is not up to date with common metadata in Zookeeper. "
                                "It means that this replica still not applied some of previous alters."
                                " Probably too many alters executing concurrently (highly not recommended). "
-                                "You can retry the query");
+                                "You can retry this error");

            /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level.
            if (query_context->getZooKeeperMetadataTransaction())
--- a/src/Storages/StorageURL.cpp
+++ b/src/Storages/StorageURL.cpp
@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(

    storage_metadata.setConstraints(constraints_);
    storage_metadata.setComment(comment);
-    setInMemoryMetadata(storage_metadata);

-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings));
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
+    setInMemoryMetadata(storage_metadata);
 }


@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
                {
                    .path = curr_uri.getPath(),
                    .size = current_file_size,
-                }, getContext(), columns_description);
+                }, getContext());
            return chunk;
        }

--- a/src/Storages/StorageURLCluster.cpp
+++ b/src/Storages/StorageURLCluster.cpp
@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
    }

    storage_metadata.setConstraints(constraints_);
+    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
    setInMemoryMetadata(storage_metadata);
-    setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
 }

 void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@ -129,36 +129,45 @@ NameSet getVirtualNamesForFileLikeStorage()
    return {"_path", "_file", "_size", "_time", "_etag"};
 }

-std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns)
+std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
 {
    std::string pattern = "([^/]+)=([^/]+)/";
    re2::StringPiece input_piece(path);

    std::unordered_map<std::string, std::string> key_values;
    std::string key, value;
-    std::unordered_set<String> used_keys;
+    std::unordered_map<std::string, std::string> used_keys;
    while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
    {
-        if (used_keys.contains(key))
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key);
-        used_keys.insert(key);
+        auto it = used_keys.find(key);
+        if (it != used_keys.end() && it->second != value)
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
+        used_keys.insert({key, value});

-        auto col_name = "_" + key;
-        while (storage_columns.has(col_name))
-            col_name = "_" + col_name;
+        auto col_name = key;
        key_values[col_name] = value;
    }
    return key_values;
 }

-VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
+VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
 {
    VirtualColumnsDescription desc;

    auto add_virtual = [&](const auto & name, const auto & type)
    {
        if (storage_columns.has(name))
+        {
+            if (!context->getSettingsRef().use_hive_partitioning)
+                return;
+
+            if (storage_columns.size() == 1)
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path);
+            auto local_type = storage_columns.get(name).type;
+            storage_columns.remove(name);
+            desc.addEphemeral(name, local_type, "");
            return;
+        }

        desc.addEphemeral(name, type, "");
    };
@ -171,7 +180,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription

    if (context->getSettingsRef().use_hive_partitioning)
    {
-        auto map = parseHivePartitioningKeysAndValues(path, storage_columns);
+        auto map = parseHivePartitioningKeysAndValues(path);
        auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
        for (auto & item : map)
        {
@ -244,11 +253,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const

 void addRequestedFileLikeStorageVirtualsToChunk(
    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns)
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context)
 {
    std::unordered_map<std::string, std::string> hive_map;
    if (context->getSettingsRef().use_hive_partitioning)
-        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns);
+        hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);

    for (const auto & virtual_column : requested_virtual_columns)
    {
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)

 NameSet getVirtualNamesForFileLikeStorage();
 VirtualColumnsDescription getVirtualsForFileLikeStorage(
-    const ColumnsDescription & storage_columns,
+    ColumnsDescription & storage_columns,
    const ContextPtr & context,
    const std::string & sample_path = "",
    std::optional<FormatSettings> format_settings_ = std::nullopt);
@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage

 void addRequestedFileLikeStorageVirtualsToChunk(
    Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
-    VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns);
+    VirtualsForFileLikeStorage virtual_values, ContextPtr context);
 }

 }
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -60,7 +60,6 @@ MESSAGES_TO_RETRY = [
    "is already started to be removing by another replica right now",
    # This is from LSan, and it indicates its own internal problem:
    "Unable to get registers from thread",
-    "You can retry",
 ]

 MAX_RETRIES = 3
--- a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/init.py
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/init.py
--- a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/enable_keeper.xml
@ -0,0 +1,26 @@
+<clickhouse>
+    <keeper_server>
+        <tcp_port>2181</tcp_port>
+        <server_id>1</server_id>
+        <log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
+        <snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
+        <coordination_settings>
+            <session_timeout_ms>20000</session_timeout_ms>
+        </coordination_settings>
+        <raft_configuration>
+            <server>
+                <id>1</id>
+                <hostname>localhost</hostname>
+                <port>9444</port>
+            </server>
+        </raft_configuration>
+    </keeper_server>
+
+    <zookeeper>
+        <node index="1">
+            <host>localhost</host>
+            <port>2181</port>
+        </node>
+        <session_timeout_ms>20000</session_timeout_ms>
+    </zookeeper>
+</clickhouse>
--- a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/config/users.xml
@ -0,0 +1,8 @@
+<clickhouse>
+    <users>
+        <default>
+            <profile>default</profile>
+            <no_password></no_password>
+        </default>
+    </users>
+</clickhouse>
--- a/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
+++ b/tests/integration/test_aliases_in_default_expr_not_break_table_structure/test.py
@ -0,0 +1,71 @@
+import pytest
+import random
+import string
+
+from helpers.cluster import ClickHouseCluster
+
+
+cluster = ClickHouseCluster(__file__)
+node = cluster.add_instance(
+    "node",
+    main_configs=[
+        "config/enable_keeper.xml",
+        "config/users.xml",
+    ],
+    stay_alive=True,
+    with_minio=True,
+    macros={"shard": 1, "replica": 1},
+)
+
+
+@pytest.fixture(scope="module")
+def start_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+def randomize_table_name(table_name, random_suffix_length=10):
+    letters = string.ascii_letters + string.digits
+    return f"{table_name}_{''.join(random.choice(letters) for _ in range(random_suffix_length))}"
+
+
+@pytest.mark.parametrize("engine", ["ReplicatedMergeTree"])
+def test_aliases_in_default_expr_not_break_table_structure(start_cluster, engine):
+    """
+    Making sure that using aliases in columns' default expressions does not lead to having different columns metadata in ZooKeeper and on disk.
+    Issue: https://github.com/ClickHouse/clickhouse-private/issues/5150
+    """
+
+    data = '{"event": {"col1-key": "col1-val", "col2-key": "col2-val"}}'
+
+    table_name = randomize_table_name("t")
+
+    node.query(
+        f"""
+        DROP TABLE IF EXISTS {table_name};
+        CREATE TABLE {table_name}
+        (
+            `data` String,
+            `col1` String DEFAULT JSONExtractString(JSONExtractString(data, 'event') AS event, 'col1-key'),
+            `col2` String MATERIALIZED JSONExtractString(JSONExtractString(data, 'event') AS event, 'col2-key')
+        )
+        ENGINE = {engine}('/test/{table_name}', '{{replica}}')
+        ORDER BY col1
+        """
+    )
+
+    node.restart_clickhouse()
+
+    node.query(
+        f"""
+        INSERT INTO {table_name} (data) VALUES ('{data}');
+        """
+    )
+    assert node.query(f"SELECT data FROM {table_name}").strip() == data
+    assert node.query(f"SELECT col1 FROM {table_name}").strip() == "col1-val"
+    assert node.query(f"SELECT col2 FROM {table_name}").strip() == "col2-val"
+
+    node.query(f"DROP TABLE {table_name}")
--- a/tests/integration/test_role/test.py
+++ b/tests/integration/test_role/test.py
@ -1,5 +1,6 @@
 import time
 import pytest
+import random
 from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster
 from helpers.test_tools import TSV
@ -418,72 +419,215 @@ def test_function_current_roles():
    )


-def test_role_expiration():
-    instance.query("CREATE USER ure")
+@pytest.mark.parametrize("with_extra_role", [False, True])
+def test_role_expiration(with_extra_role):
    instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
+    instance.query("CREATE USER ure DEFAULT ROLE rre")

-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
+    instance.query("CREATE TABLE table1 (id Int) Engine=Log")
+    instance.query("CREATE TABLE table2 (id Int) Engine=Log")
+    instance.query("INSERT INTO table1 VALUES (1)")
+    instance.query("INSERT INTO table2 VALUES (2)")

+    instance.query("GRANT SELECT ON table1 TO rre")
+
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
    assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
+        "SELECT * FROM table2", user="ure"
    )

-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
    # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
    # so we wait >2 seconds until the role is expired
    time.sleep(5)

-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
+    if with_extra_role:
+        # Expiration of role "rre" from the role cache can be caused by another role being used.
+        instance.query("CREATE ROLE extra_role")
+        instance.query("CREATE USER extra_user DEFAULT ROLE extra_role")
+        instance.query("GRANT SELECT ON table1 TO extra_role")
+        assert instance.query("SELECT * FROM table1", user="extra_user") == "1\n"

-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
+    instance.query("GRANT SELECT ON table2 TO rre")
+    assert instance.query("SELECT * FROM table1", user="ure") == "1\n"
+    assert instance.query("SELECT * FROM table2", user="ure") == "2\n"

-    instance.query("DROP USER ure")
    instance.query("DROP ROLE rre")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
+    instance.query("DROP USER ure")
+    instance.query("DROP TABLE table1")
+    instance.query("DROP TABLE table2")
+
+    if with_extra_role:
+        instance.query("DROP ROLE extra_role")
+        instance.query("DROP USER extra_user")


-def test_two_roles_expiration():
-    instance.query("CREATE USER ure")
-    instance.query("CREATE ROLE rre")
-    instance.query("GRANT rre TO ure")
+def test_roles_cache():
+    # This test takes 20 seconds.
+    test_time = 20

-    instance.query("CREATE ROLE rre_second")
-
-    instance.query("CREATE TABLE IF NOT EXISTS tre (id Int) Engine=Log")
-    instance.query("INSERT INTO tre VALUES (0)")
-
-    assert "Not enough privileges" in instance.query_and_get_error(
-        "SELECT * FROM tre", user="ure"
-    )
-
-    instance.query("GRANT SELECT ON tre TO rre")
-
-    assert instance.query("SELECT * FROM tre", user="ure") == "0\n"
-
-    # access_control_improvements/role_cache_expiration_time_seconds value is 2 for the test
-    # so we wait >2 seconds until the roles are expired
-    time.sleep(5)
+    # Three users A, B, C.
+    users = ["A", "B", "C"]
+    instance.query("CREATE USER " + ", ".join(users))

+    # Table "tbl" has 10 columns. Each of the users has access to a different set of columns.
+    num_columns = 10
+    columns = [f"x{i}" for i in range(1, num_columns + 1)]
+    columns_with_types = [column + " Int64" for column in columns]
+    columns_with_types_comma_separated = ", ".join(columns_with_types)
+    values = list(range(1, num_columns + 1))
+    values_comma_separated = ", ".join([str(value) for value in values])
    instance.query(
-        "GRANT SELECT ON tre1 TO rre_second"
-    )  # we expect that both rre and rre_second are gone from cache upon this operation
+        f"CREATE TABLE tbl ({columns_with_types_comma_separated}) ENGINE=MergeTree ORDER BY tuple()"
+    )
+    instance.query(f"INSERT INTO tbl VALUES ({values_comma_separated})")
+    columns_to_values = dict([(f"x{i}", i) for i in range(1, num_columns + 1)])

-    instance.query("CREATE TABLE IF NOT EXISTS tre1 (id Int) Engine=Log")
-    instance.query("INSERT INTO tre1 VALUES (0)")
-    instance.query("GRANT SELECT ON tre1 TO rre")
+    # In this test we create and modify roles multiple times along with updating the following variables.
+    # Then we check that each of the users has access to the expected set of columns.
+    roles = []
+    users_to_roles = dict([(user, []) for user in users])
+    roles_to_columns = {}

-    assert instance.query("SELECT * from tre1", user="ure") == "0\n"
+    # Checks that each of the users can access the expected set of columns and can't access other columns.
+    def check():
+        for user in random.sample(users, len(users)):
+            expected_roles = users_to_roles[user]
+            expected_columns = list(
+                set(sum([roles_to_columns[role] for role in expected_roles], []))
+            )
+            expected_result = sorted(
+                [columns_to_values[column] for column in expected_columns]
+            )
+            query = " UNION ALL ".join(
+                [
+                    f"SELECT * FROM viewIfPermitted(SELECT {column} AS c FROM tbl ELSE null('c Int64'))"
+                    for column in columns
+                ]
+            )
+            result = instance.query(query, user=user).splitlines()
+            result = sorted([int(value) for value in result])
+            ok = result == expected_result
+            if not ok:
+                print(f"Show grants for {user}:")
+                print(
+                    instance.query(
+                        "SHOW GRANTS FOR " + ", ".join([user] + expected_roles)
+                    )
+                )
+                print(f"Expected result: {expected_result}")
+                print(f"Got unexpected result: {result}")
+            assert ok

-    instance.query("DROP USER ure")
-    instance.query("DROP ROLE rre")
-    instance.query("DROP ROLE rre_second")
-    instance.query("DROP TABLE tre")
-    instance.query("DROP TABLE tre1")
+    # Grants one of our roles a permission to access one of the columns.
+    def grant_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = [
+            column for column in columns if column not in columns_used_in_roles
+        ]
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        role = random.choice(roles)
+        instance.query(f"GRANT SELECT({column}) ON tbl TO {role}")
+        roles_to_columns[role].append(column)
+        return True
+
+    # Revokes a permission to access one of the granted column from all our roles.
+    def revoke_column():
+        columns_used_in_roles = sum(roles_to_columns.values(), [])
+        columns_to_choose = list(set(columns_used_in_roles))
+        if not columns_to_choose or not roles:
+            return False
+        column = random.choice(columns_to_choose)
+        roles_str = ", ".join(roles)
+        instance.query(f"REVOKE SELECT({column}) ON tbl FROM {roles_str}")
+        for role in roles_to_columns:
+            if column in roles_to_columns[role]:
+                roles_to_columns[role].remove(column)
+        return True
+
+    # Creates a role and grants it to one of the users.
+    def create_role():
+        for role in ["R1", "R2", "R3"]:
+            if role not in roles:
+                instance.query(f"CREATE ROLE {role}")
+                roles.append(role)
+                if role not in roles_to_columns:
+                    roles_to_columns[role] = []
+        if "R1" not in users_to_roles["A"]:
+            instance.query("GRANT R1 TO A")
+            users_to_roles["A"].append("R1")
+        elif "R2" not in users_to_roles["B"]:
+            instance.query("GRANT R2 TO B")
+            users_to_roles["B"].append("R2")
+        elif "R3" not in users_to_roles["B"]:
+            instance.query("GRANT R3 TO R2")
+            users_to_roles["B"].append("R3")
+        elif "R3" not in users_to_roles["C"]:
+            instance.query("GRANT R3 TO C")
+            users_to_roles["C"].append("R3")
+        else:
+            return False
+        return True
+
+    # Drops one of our roles.
+    def drop_role():
+        if not roles:
+            return False
+        role = random.choice(roles)
+        instance.query(f"DROP ROLE {role}")
+        roles.remove(role)
+        for u in users_to_roles:
+            if role in users_to_roles[u]:
+                users_to_roles[u].remove(role)
+        del roles_to_columns[role]
+        if (role == "R2") and ("R3" in users_to_roles["B"]):
+            users_to_roles["B"].remove("R3")
+        return True
+
+    # Modifies some grants or roles randomly.
+    def modify():
+        while True:
+            rnd = random.random()
+            if rnd < 0.4:
+                if grant_column():
+                    break
+            elif rnd < 0.5:
+                if revoke_column():
+                    break
+            elif rnd < 0.9:
+                if create_role():
+                    break
+            else:
+                if drop_role():
+                    break
+
+    def maybe_modify():
+        if random.random() < 0.9:
+            modify()
+            modify()
+
+    # Sleeping is necessary in this test because the role cache in ClickHouse has expiration timeout.
+    def maybe_sleep():
+        if random.random() < 0.1:
+            # "role_cache_expiration_time_seconds" is set to 2 seconds in the test configuration.
+            # We need a sleep longer than that in this test sometimes.
+            seconds = random.random() * 5
+            print(f"Sleeping {seconds} seconds")
+            time.sleep(seconds)
+
+    # Main part of the test.
+    start_time = time.time()
+    end_time = start_time + test_time
+
+    while time.time() < end_time:
+        check()
+        maybe_sleep()
+        maybe_modify()
+        maybe_sleep()
+
+    check()
+
+    instance.query("DROP USER " + ", ".join(users))
+    instance.query("DROP ROLE " + ", ".join(roles))
+    instance.query("DROP TABLE tbl")
--- a/tests/integration/test_storage_azure_blob_storage/test.py
+++ b/tests/integration/test_storage_azure_blob_storage/test.py
@ -1513,19 +1513,19 @@ def test_hive_partitioning_with_one_parameter(cluster):
    azure_query(
        node,
        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values}",
        settings={"azure_truncate_on_insert": 1},
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column2, _file, _path, column1 FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}')"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}')"
    )
    assert azure_query(
        node, query, settings={"use_hive_partitioning": 1}
    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
+        "Gordon\tsample.csv\t{bucket}/{max_path}\tElizabeth".format(
            bucket="cont", max_path=path
        )
    ]
@ -1533,14 +1533,14 @@ def test_hive_partitioning_with_one_parameter(cluster):
    query = (
        f"SELECT column2 FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
    )
    assert azure_query(
        node, query, settings={"use_hive_partitioning": 1}
    ).splitlines() == ["Gordon"]


-def test_hive_partitioning_with_two_parameters(cluster):
+def test_hive_partitioning_with_all_parameters(cluster):
    # type: (ClickHouseCluster) -> None
    node = cluster.instances["node"]  # type: ClickHouseInstance
    table_format = "column1 String, column2 String"
@ -1551,40 +1551,19 @@ def test_hive_partitioning_with_two_parameters(cluster):
    azure_query(
        node,
        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
        settings={"azure_truncate_on_insert": 1},
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == [
-        "Elizabeth\tGordon\tsample.csv\t{bucket}/{max_path}\tElizabeth\tGordon".format(
-            bucket="cont", max_path=path
-        )
-    ]
+    pattern = r"DB::Exception: Cannot use hive partitioning for file"

-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
-
-    query = (
-        f"SELECT column1 FROM azureBlobStorage(azure_conf2, "
-        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column2=_column2 AND column1=_column1;"
-    )
-    assert azure_query(
-        node, query, settings={"use_hive_partitioning": 1}
-    ).splitlines() == ["Elizabeth"]
+    with pytest.raises(Exception, match=pattern):
+        azure_query(node, query, settings={"use_hive_partitioning": 1})


 def test_hive_partitioning_without_setting(cluster):
@ -1593,19 +1572,19 @@ def test_hive_partitioning_without_setting(cluster):
    table_format = "column1 String, column2 String"
    values_1 = f"('Elizabeth', 'Gordon')"
    values_2 = f"('Emilia', 'Gregor')"
-    path = "a/column1=Elizabeth/column2=Gordon/sample.csv"
+    path = "a/column1=Elizabeth/column2=Gordon/column3=Gordon/sample.csv"

    azure_query(
        node,
        f"INSERT INTO TABLE FUNCTION azureBlobStorage(azure_conf2, storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}',"
-        f" container='cont', blob_path='{path}', format='CSV', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
+        f" container='cont', blob_path='{path}', format='CSVWithNames', compression='auto', structure='{table_format}') VALUES {values_1}, {values_2}",
        settings={"azure_truncate_on_insert": 1},
    )

    query = (
-        f"SELECT column1, column2, _file, _path, _column1, _column2 FROM azureBlobStorage(azure_conf2, "
+        f"SELECT column1, column2, _file, _path, column3 FROM azureBlobStorage(azure_conf2, "
        f"storage_account_url = '{cluster.env_variables['AZURITE_STORAGE_ACCOUNT_URL']}', container='cont', "
-        f"blob_path='{path}', format='CSV', structure='{table_format}') WHERE column1=_column1;"
+        f"blob_path='{path}', format='CSVWithNames', structure='{table_format}');"
    )
    pattern = re.compile(
        r"DB::Exception: Unknown expression identifier '.*' in scope.*", re.DOTALL
--- a/tests/integration/test_storage_hdfs/test.py
+++ b/tests/integration/test_storage_hdfs/test.py
@ -1259,33 +1259,21 @@ def test_respect_object_existence_on_partitioned_write(started_cluster):

 def test_hive_partitioning_with_one_parameter(started_cluster):
    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(f"/column0=Elizabeth/parquet_1", f"Elizabeth\tGordon\n")
-    assert hdfs_api.read_data(f"/column0=Elizabeth/parquet_1") == f"Elizabeth\tGordon\n"
+    hdfs_api.write_data(
+        f"/column0=Elizabeth/file_1", f"column0,column1\nElizabeth,Gordon\n"
+    )
+    assert (
+        hdfs_api.read_data(f"/column0=Elizabeth/file_1")
+        == f"column0,column1\nElizabeth,Gordon\n"
+    )

    r = node1.query(
-        "SELECT _column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/parquet_1', 'TSV')",
+        "SELECT column0 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/file_1', 'CSVWithNames')",
        settings={"use_hive_partitioning": 1},
    )
    assert r == f"Elizabeth\n"


-def test_hive_partitioning_with_two_parameters(started_cluster):
-    hdfs_api = started_cluster.hdfs_api
-    hdfs_api.write_data(
-        f"/column0=Elizabeth/column1=Gordon/parquet_2", f"Elizabeth\tGordon\n"
-    )
-    assert (
-        hdfs_api.read_data(f"/column0=Elizabeth/column1=Gordon/parquet_2")
-        == f"Elizabeth\tGordon\n"
-    )
-
-    r = node1.query(
-        "SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
-        settings={"use_hive_partitioning": 1},
-    )
-    assert r == f"Gordon\n"
-
-
 def test_hive_partitioning_without_setting(started_cluster):
    hdfs_api = started_cluster.hdfs_api
    hdfs_api.write_data(
@ -1301,7 +1289,7 @@ def test_hive_partitioning_without_setting(started_cluster):

    with pytest.raises(QueryRuntimeException, match=pattern):
        node1.query(
-            f"SELECT _column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
+            f"SELECT column1 FROM hdfs('hdfs://hdfs1:9000/column0=Elizabeth/column1=Gordon/parquet_2', 'TSV');",
            settings={"use_hive_partitioning": 0},
        )

--- a/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh
+++ b/tests/queries/0_stateless/01079_bad_alters_zookeeper_long.sh
@ -26,6 +26,10 @@ while [[ $($CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='00000000
    sleep 1
 done

+while [[ $($CLICKHOUSE_CLIENT --query "SELECT * FROM system.replication_queue WHERE type='ALTER_METADATA' AND database = '$CLICKHOUSE_DATABASE'" 2>&1) ]]; do
+    sleep 1
+done
+
 $CLICKHOUSE_CLIENT --query "SHOW CREATE TABLE table_for_bad_alters;" # Type changed, but we can revert back

 $CLICKHOUSE_CLIENT --query "INSERT INTO table_for_bad_alters VALUES(2, 2, 7)"
--- a/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
+++ b/tests/queries/0_stateless/01396_inactive_replica_cleanup_nodes_zookeeper.sh
@ -23,11 +23,11 @@ $CLICKHOUSE_CLIENT --query "
    DETACH TABLE r2;
 "

-$CLICKHOUSE_CLIENT --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"
+# insert_keeper_fault_injection_probability=0 -- can slowdown insert a lot (produce a lot of parts)
+$CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 --max_block_size 1 --min_insert_block_size_rows 1 --min_insert_block_size_bytes 1 --max_insert_threads 16 --query "INSERT INTO r1 SELECT * FROM numbers_mt(${SCALE})"


 # Now wait for cleanup thread
-
 for _ in {1..60}; do
    $CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
    [[ $($CLICKHOUSE_CLIENT --query "SELECT sum(toUInt32(extract(message, 'Removed (\d+) old log entries'))) FROM system.text_log WHERE event_date >= yesterday() AND logger_name LIKE '%' || '$CLICKHOUSE_DATABASE' || '%r1%(ReplicatedMergeTreeCleanupThread)%' AND message LIKE '%Removed % old log entries%'") -gt $((SCALE - 10)) ]] && break;
--- a/tests/queries/0_stateless/02995_index_7.sh
+++ b/tests/queries/0_stateless/02995_index_7.sh
@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage
+# Tags: long, no-debug, no-asan, no-tsan, no-msan, no-ubsan, no-sanitize-coverage, no-distributed-cache

 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.reference
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.reference
@ -1,4 +1,14 @@
 TESTING THE FILE HIVE PARTITIONING
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -9,56 +19,36 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 42	2020-01-01
 [1,2,3]	42.42
 Array(Int64)	LowCardinality(Float64)
 101
 2070
-4081
-2070
 2070
+b
 1
 1
 TESTING THE URL PARTITIONING
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -71,6 +61,16 @@ Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
 1
 TESTING THE S3 PARTITIONING
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 first	 last	Elizabeth
 Jorge	Frank	Elizabeth
 Hunter	Moreno	Elizabeth
@ -81,40 +81,35 @@ Stanley	Gibson	Elizabeth
 Eugenia	Greer	Elizabeth
 Jeffery	Delgado	Elizabeth
 Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-Elizabeth	Gordon	Elizabeth	Gordon
-Elizabeth	Gordon	Elizabeth
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
 OK
 TESTING THE S3CLUSTER PARTITIONING
-first	 last	Elizabeth
-Jorge	Frank	Elizabeth
-Hunter	Moreno	Elizabeth
-Esther	Guzman	Elizabeth
-Dennis	Stephens	Elizabeth
-Nettie	Franklin	Elizabeth
-Stanley	Gibson	Elizabeth
-Eugenia	Greer	Elizabeth
-Jeffery	Delgado	Elizabeth
-Clara	Cross	Elizabeth
-Elizabeth	Gordon	Elizabeth
-Eva	Schmidt	Elizabeth	Schmidt
-Samuel	Schmidt	Elizabeth	Schmidt
-Eva	Schmidt	Elizabeth
-Samuel	Schmidt	Elizabeth
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
+ last	Elizabeth
+Frank	Elizabeth
+Moreno	Elizabeth
+Guzman	Elizabeth
+Stephens	Elizabeth
+Franklin	Elizabeth
+Gibson	Elizabeth
+Greer	Elizabeth
+Delgado	Elizabeth
+Cross	Elizabeth
--- a/tests/queries/0_stateless/03203_hive_style_partitioning.sh
+++ b/tests/queries/0_stateless/03203_hive_style_partitioning.sh
@ -11,48 +11,34 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE FILE HIVE PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;

-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;

-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
-
-SELECT _number, _date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
-SELECT _array, _float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT toTypeName(_array), toTypeName(_float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
-SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE _number = 42;
+SELECT number, date FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') LIMIT 1;
+SELECT array, float FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT toTypeName(array), toTypeName(float) FROM file('$CURDIR/data_hive/partitioning/array=[1,2,3]/float=42.42/sample.parquet') LIMIT 1;
+SELECT count(*) FROM file('$CURDIR/data_hive/partitioning/number=42/date=2020-01-01/sample.parquet') WHERE number = 42;
 """

 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;

-SELECT _identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
-SELECT __identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT identifier FROM file('$CURDIR/data_hive/partitioning/identifier=*/email.csv') LIMIT 2;
+SELECT a FROM file('$CURDIR/data_hive/partitioning/a=b/a=b/sample.parquet') LIMIT 1;
 """

 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;

-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "INCORRECT_DATA"

 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;

-SELECT *, _column0 FROM file('$CURDIR/data_hive/partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, non_existing_column FROM file('$CURDIR/data_hive/partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
 """ 2>&1 | grep -c "UNKNOWN_IDENTIFIER"


@ -62,23 +48,9 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE URL PARTITIONING'"
 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 1;

-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM url('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""
+SELECT *, non_existing_column FROM url('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;"""

 $CLICKHOUSE_LOCAL -n -q """
 set use_hive_partitioning = 0;
@ -93,24 +65,10 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3 PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;

-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Gordon/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-
-SELECT *, _non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
-SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = _column0;
+SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """

 $CLICKHOUSE_CLIENT -n -q """
@ -124,13 +82,7 @@ $CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
 $CLICKHOUSE_CLIENT -n -q """
 set use_hive_partitioning = 1;

-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;

-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = _column0;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column1 = _column1;
-
-SELECT *, _column0, _column1 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
-SELECT *, _column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/column1=Schmidt/sample.parquet') WHERE column0 = _column0 AND column1 = _column1;
+SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
 """
--- a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
+++ b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth/sample.parquet
--- a/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet
+++ b/tests/queries/0_stateless/data_hive/partitioning/column0=Elizabeth/column0=Elizabeth1/sample.parquet