Merge branch 'master' into hdfs_config_prefix

2024-09-21 09:10:48 +00:00 · 2022-03-30 11:02:31 +08:00 · 2022-03-30 11:02:31 +08:00 · 9ffb42d507
commit 9ffb42d507
parent 631352ba4b a842a81aba
41 changed files with 294 additions and 95 deletions
--- a/contrib/llvm-cmake/CMakeLists.txt
+++ b/contrib/llvm-cmake/CMakeLists.txt
@ -1,9 +1,12 @@
-if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
-   set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
+# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
+# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
+# Possible workaround is to use llvm-tblgen from some package...
+# But lets just enable LLVM for native builds
+if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
+    set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
 else()
-   set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
+    set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
 endif()
-
 option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})

 if (NOT ENABLE_EMBEDDED_COMPILER)
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -1378,7 +1378,7 @@ $REF_SHA	$SHA_TO_TEST	$(numactl --hardware | sed -n 's/^available:[[:space:]]\+/
 EOF

    # Also insert some data about the check into the CI checks table.
-    "${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
+    "${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
        < ci-checks.tsv

    set -x
--- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md
@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree"
 这个引擎:

 -   允许快速写入不断变化的对象状态。
-   删除后台中的旧对象状态。 这显着降低了存储体积。
+-   删除后台中的旧对象状态。 这显著降低了存储体积。

 请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。

--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -184,6 +184,11 @@ void LocalServer::tryInitPath()
    if (path.back() != '/')
        path += '/';

+    fs::create_directories(fs::path(path) / "user_defined/");
+    fs::create_directories(fs::path(path) / "data/");
+    fs::create_directories(fs::path(path) / "metadata/");
+    fs::create_directories(fs::path(path) / "metadata_dropped/");
+
    global_context->setPath(path);

    global_context->setTemporaryStorage(path + "tmp");
@ -565,7 +570,6 @@ void LocalServer::processConfig()
        /// Lock path directory before read
        status.emplace(fs::path(path) / "status", StatusFile::write_full_info);

-        fs::create_directories(fs::path(path) / "user_defined/");
        LOG_DEBUG(log, "Loading user defined objects from {}", path);
        Poco::File(path + "user_defined/").createDirectories();
        UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
@ -573,9 +577,6 @@ void LocalServer::processConfig()
        LOG_DEBUG(log, "Loaded user defined objects.");

        LOG_DEBUG(log, "Loading metadata from {}", path);
-        fs::create_directories(fs::path(path) / "data/");
-        fs::create_directories(fs::path(path) / "metadata/");
-
        loadMetadataSystem(global_context);
        attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
        attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
--- a/src/Functions/flattenTuple.cpp
+++ b/src/Functions/flattenTuple.cpp
@ -0,0 +1,68 @@
+#include <Functions/IFunction.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/ObjectUtils.h>
+#include <Columns/ColumnTuple.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+class FunctionFlattenTuple : public IFunction
+{
+public:
+    static constexpr auto name = "flattenTuple";
+    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionFlattenTuple>(); }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 1; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        const auto & type = arguments[0];
+        const auto * type_tuple = checkAndGetDataType<DataTypeTuple>(type.get());
+        if (!type_tuple || !type_tuple->haveExplicitNames())
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                "Argument for function '{}' must be Named Tuple. Got '{}'",
+                getName(), type->getName());
+
+        auto [paths, types] = flattenTuple(type);
+        Names names;
+        names.reserve(paths.size());
+        for (const auto & path : paths)
+            names.push_back(path.getPath());
+
+        return std::make_shared<DataTypeTuple>(types, names);
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
+    {
+        auto column = arguments.at(0).column;
+        if (!checkAndGetColumn<ColumnTuple>(column.get()))
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
+                "Illegal column {} of first argument of function {}. Expected ColumnTuple",
+                column->getName(), getName());
+
+        return flattenTuple(column);
+    }
+};
+
+}
+
+void registerFunctionFlattenTuple(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionFlattenTuple>();
+}
+
+}
--- a/src/Functions/registerFunctionsMiscellaneous.cpp
+++ b/src/Functions/registerFunctionsMiscellaneous.cpp
@ -80,6 +80,7 @@ void registerFunctionInitialQueryID(FunctionFactory & factory);
 void registerFunctionServerUUID(FunctionFactory &);
 void registerFunctionZooKeeperSessionUptime(FunctionFactory &);
 void registerFunctionGetOSKernelVersion(FunctionFactory &);
+void registerFunctionFlattenTuple(FunctionFactory &);

 #if USE_ICU
 void registerFunctionConvertCharset(FunctionFactory &);
@ -166,6 +167,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
    registerFunctionServerUUID(factory);
    registerFunctionZooKeeperSessionUptime(factory);
    registerFunctionGetOSKernelVersion(factory);
+    registerFunctionFlattenTuple(factory);

 #if USE_ICU
    registerFunctionConvertCharset(factory);
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -45,7 +45,8 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & context)
+static MergeTreeReaderSettings getMergeTreeReaderSettings(
+    const ContextPtr & context, const SelectQueryInfo & query_info)
 {
    const auto & settings = context->getSettingsRef();
    return
@ -53,6 +54,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(const ContextPtr & con
        .read_settings = context->getReadSettings(),
        .save_marks_in_cache = true,
        .checksum_on_read = settings.checksum_on_read,
+        .read_in_order = query_info.input_order_info != nullptr,
    };
 }

@ -82,7 +84,7 @@ ReadFromMergeTree::ReadFromMergeTree(
        getPrewhereInfo(query_info_),
        data_.getPartitionValueType(),
        virt_column_names_)})
-    , reader_settings(getMergeTreeReaderSettings(context_))
+    , reader_settings(getMergeTreeReaderSettings(context_, query_info_))
    , prepared_parts(std::move(parts_))
    , real_column_names(std::move(real_column_names_))
    , virt_column_names(std::move(virt_column_names_))
@ -206,6 +208,7 @@ ProcessorPtr ReadFromMergeTree::createSource(
            .colums_to_read = required_columns
        };
    }
+
    return std::make_shared<TSource>(
            data, storage_snapshot, part.data_part, max_block_size, preferred_block_size_bytes,
            preferred_max_column_in_block_size_bytes, required_columns, part.ranges, use_uncompressed_cache, prewhere_info,
@ -921,7 +924,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
            total_marks_pk += part->index_granularity.getMarksCountWithoutFinal();
        parts_before_pk = parts.size();

-        auto reader_settings = getMergeTreeReaderSettings(context);
+        auto reader_settings = getMergeTreeReaderSettings(context, query_info);

        bool use_skip_indexes = settings.use_skip_indexes;
        if (select.final() && !settings.use_skip_indexes_if_final)
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -575,9 +575,10 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
    return checksum->second.file_size;
 }

-String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const
+String IMergeTreeDataPart::getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const
 {
-    const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical();
+    auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects().withSubcolumns();
+    auto storage_columns = storage_snapshot->getColumns(options);
    MergeTreeData::AlterConversions alter_conversions;
    if (!parent_part)
        alter_conversions = storage.getAlterConversionsForPart(shared_from_this());
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -168,7 +168,7 @@ public:

    /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()).
    /// If no checksums are present returns the name of the first physically existing column.
-    String getColumnNameWithMinimumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const;
+    String getColumnNameWithMinimumCompressedSize(const StorageSnapshotPtr & storage_snapshot) const;

    bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); }

--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@ -24,7 +24,7 @@ namespace
 /// least one existing (physical) column in part.
 bool injectRequiredColumnsRecursively(
    const String & column_name,
-    const ColumnsDescription & storage_columns,
+    const StorageSnapshotPtr & storage_snapshot,
    const MergeTreeData::AlterConversions & alter_conversions,
    const MergeTreeData::DataPartPtr & part,
    Names & columns,
@ -36,7 +36,8 @@ bool injectRequiredColumnsRecursively(
    /// stages.
    checkStackSize();

-    auto column_in_storage = storage_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::AllPhysical, column_name);
+    auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
+    auto column_in_storage = storage_snapshot->tryGetColumn(options, column_name);
    if (column_in_storage)
    {
        auto column_name_in_part = column_in_storage->getNameInStorage();
@ -63,7 +64,8 @@ bool injectRequiredColumnsRecursively(

    /// Column doesn't have default value and don't exist in part
    /// don't need to add to required set.
-    const auto column_default = storage_columns.getDefault(column_name);
+    auto metadata_snapshot = storage_snapshot->getMetadataForQuery();
+    const auto column_default = metadata_snapshot->getColumns().getDefault(column_name);
    if (!column_default)
        return false;

@ -73,39 +75,36 @@ bool injectRequiredColumnsRecursively(

    bool result = false;
    for (const auto & identifier : identifiers)
-        result |= injectRequiredColumnsRecursively(identifier, storage_columns, alter_conversions, part, columns, required_columns, injected_columns);
+        result |= injectRequiredColumnsRecursively(identifier, storage_snapshot, alter_conversions, part, columns, required_columns, injected_columns);

    return result;
 }

 }

-NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns)
+NameSet injectRequiredColumns(
+    const MergeTreeData & storage,
+    const StorageSnapshotPtr & storage_snapshot,
+    const MergeTreeData::DataPartPtr & part,
+    Names & columns)
 {
    NameSet required_columns{std::begin(columns), std::end(columns)};
    NameSet injected_columns;

    bool have_at_least_one_physical_column = false;
-
-    const auto & storage_columns = metadata_snapshot->getColumns();
    MergeTreeData::AlterConversions alter_conversions;
    if (!part->isProjectionPart())
        alter_conversions = storage.getAlterConversionsForPart(part);
+
    for (size_t i = 0; i < columns.size(); ++i)
    {
-        auto name_in_storage = Nested::extractTableName(columns[i]);
-        if (storage_columns.has(name_in_storage) && isObject(storage_columns.get(name_in_storage).type))
-        {
-            have_at_least_one_physical_column = true;
-            continue;
-        }
-
        /// We are going to fetch only physical columns
-        if (!storage_columns.hasColumnOrSubcolumn(GetColumnsOptions::AllPhysical, columns[i]))
-            throw Exception("There is no physical column or subcolumn " + columns[i] + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE);
+        auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns().withExtendedObjects();
+        if (!storage_snapshot->tryGetColumn(options, columns[i]))
+            throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]);

        have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
-            columns[i], storage_columns, alter_conversions,
+            columns[i], storage_snapshot, alter_conversions,
            part, columns, required_columns, injected_columns);
    }

@ -115,7 +114,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada
        */
    if (!have_at_least_one_physical_column)
    {
-        const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(metadata_snapshot);
+        const auto minimum_size_column_name = part->getColumnNameWithMinimumCompressedSize(storage_snapshot);
        columns.push_back(minimum_size_column_name);
        /// correctly report added column
        injected_columns.insert(columns.back());
@ -271,7 +270,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
    Names pre_column_names;

    /// inject columns required for defaults evaluation
-    bool should_reorder = !injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, column_names).empty();
+    bool should_reorder = !injectRequiredColumns(storage, storage_snapshot, data_part, column_names).empty();

    if (prewhere_info)
    {
@ -296,7 +295,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(
        if (pre_column_names.empty())
            pre_column_names.push_back(column_names[0]);

-        const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot->getMetadataForQuery(), data_part, pre_column_names);
+        const auto injected_pre_columns = injectRequiredColumns(storage, storage_snapshot, data_part, pre_column_names);
        if (!injected_pre_columns.empty())
            should_reorder = true;

--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@ -22,7 +22,7 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredict
  * so that you can calculate the DEFAULT expression for these columns.
  * Adds them to the `columns`.
  */
-NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);
+NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageSnapshotPtr & storage_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns);


 /// A batch of work for MergeTreeThreadSelectBlockInputStream
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@ -877,12 +877,22 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
    {
        std::atomic<size_t> total_rows{0};

+        /// Do not check number of read rows if we have reading
+        /// in order of sorting key with limit.
+        /// In general case, when there exists WHERE clause
+        /// it's impossible to estimate number of rows precisely,
+        /// because we can stop reading at any time.
+
        SizeLimits limits;
-        if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
+        if (settings.read_overflow_mode == OverflowMode::THROW
+            && settings.max_rows_to_read
+            && !query_info.input_order_info)
            limits = SizeLimits(settings.max_rows_to_read, 0, settings.read_overflow_mode);

        SizeLimits leaf_limits;
-        if (settings.read_overflow_mode_leaf == OverflowMode::THROW && settings.max_rows_to_read_leaf)
+        if (settings.read_overflow_mode_leaf == OverflowMode::THROW
+            && settings.max_rows_to_read_leaf
+            && !query_info.input_order_info)
            leaf_limits = SizeLimits(settings.max_rows_to_read_leaf, 0, settings.read_overflow_mode_leaf);

        auto mark_cache = context->getIndexMarkCache();
--- a/src/Storages/MergeTree/MergeTreeIOSettings.h
+++ b/src/Storages/MergeTree/MergeTreeIOSettings.h
@ -20,6 +20,8 @@ struct MergeTreeReaderSettings
    bool save_marks_in_cache = false;
    /// Validate checksums on reading (should be always enabled in production).
    bool checksum_on_read = true;
+    /// True if we read in order of sorting key.
+    bool read_in_order = false;
 };

 struct MergeTreeWriterSettings
--- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp
@ -39,9 +39,12 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
 {
    /// Actually it means that parallel reading from replicas enabled
    /// and we have to collaborate with initiator.
-    /// In this case we won't set approximate rows, because it will be accounted multiple times
-    if (!extension_.has_value())
+    /// In this case we won't set approximate rows, because it will be accounted multiple times.
+    /// Also do not count amount of read rows if we read in order of sorting key,
+    /// because we don't know actual amount of read rows in case when limit is set.
+    if (!extension_.has_value() && !reader_settings.read_in_order)
        addTotalRowsApprox(total_rows);
+
    ordered_names = header_without_virtual_columns.getNames();
 }

--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@ -41,7 +41,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
    addTotalRowsApprox(data_part->rows_count);

    /// Add columns because we don't want to read empty blocks
-    injectRequiredColumns(storage, storage_snapshot->metadata, data_part, columns_to_read);
+    injectRequiredColumns(storage, storage_snapshot, data_part, columns_to_read);
    NamesAndTypesList columns_for_reader;
    if (take_column_types_from_storage)
    {
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@ -51,40 +51,42 @@ NamesAndTypesList StorageSnapshot::getColumns(const GetColumnsOptions & options)
 NamesAndTypesList StorageSnapshot::getColumnsByNames(const GetColumnsOptions & options, const Names & names) const
 {
    NamesAndTypesList res;
-    const auto & columns = getMetadataForQuery()->getColumns();
    for (const auto & name : names)
+        res.push_back(getColumn(options, name));
+    return res;
+}
+
+std::optional<NameAndTypePair> StorageSnapshot::tryGetColumn(const GetColumnsOptions & options, const String & column_name) const
+{
+    const auto & columns = getMetadataForQuery()->getColumns();
+    auto column = columns.tryGetColumn(options, column_name);
+    if (column && (!isObject(column->type) || !options.with_extended_objects))
+        return column;
+
+    if (options.with_extended_objects)
    {
-        auto column = columns.tryGetColumn(options, name);
-        if (column && !isObject(column->type))
-        {
-            res.emplace_back(std::move(*column));
-            continue;
-        }
-
-        if (options.with_extended_objects)
-        {
-            auto object_column = object_columns.tryGetColumn(options, name);
-            if (object_column)
-            {
-                res.emplace_back(std::move(*object_column));
-                continue;
-            }
-        }
-
-        if (options.with_virtuals)
-        {
-            auto it = virtual_columns.find(name);
-            if (it != virtual_columns.end())
-            {
-                res.emplace_back(name, it->second);
-                continue;
-            }
-        }
-
-        throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", name);
+        auto object_column = object_columns.tryGetColumn(options, column_name);
+        if (object_column)
+            return object_column;
    }

-    return res;
+    if (options.with_virtuals)
+    {
+        auto it = virtual_columns.find(column_name);
+        if (it != virtual_columns.end())
+            return NameAndTypePair(column_name, it->second);
+    }
+
+    return {};
+}
+
+NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, const String & column_name) const
+{
+    auto column = tryGetColumn(options, column_name);
+    if (!column)
+        throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", column_name);
+
+    return *column;
 }

 Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
--- a/src/Storages/StorageSnapshot.h
+++ b/src/Storages/StorageSnapshot.h
@ -61,6 +61,10 @@ struct StorageSnapshot
    /// Get columns with types according to options only for requested names.
    NamesAndTypesList getColumnsByNames(const GetColumnsOptions & options, const Names & names) const;

+    /// Get column with type according to options for requested name.
+    std::optional<NameAndTypePair> tryGetColumn(const GetColumnsOptions & options, const String & column_name) const;
+    NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const;
+
    /// Block with ordinary + materialized + aliases + virtuals + subcolumns.
    Block getSampleBlockForColumns(const Names & column_names) const;

--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@ -10,13 +10,13 @@ from get_robot_token import get_parameter_from_ssm
 class ClickHouseHelper:
    def __init__(self, url=None):
        if url is None:
-            self.url = get_parameter_from_ssm("clickhouse-test-stat-url2")
-            self.auth = {
-                "X-ClickHouse-User": get_parameter_from_ssm(
-                    "clickhouse-test-stat-login2"
-                ),
-                "X-ClickHouse-Key": "",
-            }
+            url = get_parameter_from_ssm("clickhouse-test-stat-url")
+
+        self.url = url
+        self.auth = {
+            "X-ClickHouse-User": get_parameter_from_ssm("clickhouse-test-stat-login"),
+            "X-ClickHouse-Key": get_parameter_from_ssm("clickhouse-test-stat-password")
+        }

    @staticmethod
    def _insert_json_str_info_impl(url, auth, db, table, json_str):
@ -179,7 +179,7 @@ def mark_flaky_tests(clickhouse_helper, check_name, test_results):
            check_name=check_name
        )

-        tests_data = clickhouse_helper.select_json_each_row("gh-data", query)
+        tests_data = clickhouse_helper.select_json_each_row("default", query)
        master_failed_tests = {row["test_name"] for row in tests_data}
        logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))

--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@ -197,4 +197,4 @@ if __name__ == "__main__":
        report_url,
        CHECK_NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@ -459,7 +459,7 @@ def main():
        NAME,
    )
    ch_helper = ClickHouseHelper()
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)


 if __name__ == "__main__":
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@ -234,7 +234,7 @@ def main():
        NAME,
    )
    ch_helper = ClickHouseHelper()
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)


 if __name__ == "__main__":
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@ -114,4 +114,4 @@ if __name__ == "__main__":
        report_url,
        NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@ -204,7 +204,7 @@ if __name__ == "__main__":
        report_url,
        NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)

    # Refuse other checks to run if fast test failed
    if state != "success":
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@ -356,7 +356,7 @@ if __name__ == "__main__":
        report_url,
        check_name_with_group,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)

    if state != "success":
        if "force-tests" in pr_info.labels:
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@ -279,4 +279,4 @@ if __name__ == "__main__":
        report_url,
        check_name_with_group,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/keeper_jepsen_check.py
+++ b/tests/ci/keeper_jepsen_check.py
@ -271,5 +271,5 @@ if __name__ == "__main__":
        report_url,
        CHECK_NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
    clear_autoscaling_group()
--- a/tests/ci/split_build_smoke_check.py
+++ b/tests/ci/split_build_smoke_check.py
@ -147,4 +147,4 @@ if __name__ == "__main__":
        report_url,
        CHECK_NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@ -176,4 +176,4 @@ if __name__ == "__main__":
        report_url,
        check_name,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@ -117,4 +117,4 @@ if __name__ == "__main__":
        report_url,
        NAME,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@ -173,4 +173,4 @@ if __name__ == "__main__":
        report_url,
        check_name,
    )
-    ch_helper.insert_events_into(db="gh-data", table="checks", events=prepared_events)
+    ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
--- a/tests/integration/test_allowed_url_from_config/test.py
+++ b/tests/integration/test_allowed_url_from_config/test.py
@ -280,4 +280,4 @@ def test_HDFS(start_cluster):

 def test_schema_inference(start_cluster):
    error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')")
-    assert(error.find('ReadWriteBufferFromHTTPBase') == -1)
+    assert error.find("ReadWriteBufferFromHTTPBase") == -1
--- a/tests/integration/test_distributed_respect_user_timeouts/test.py
+++ b/tests/integration/test_distributed_respect_user_timeouts/test.py
@ -94,7 +94,7 @@ def _check_exception(exception, expected_tries=3):

@pytest.fixture(scope="module", params=["configs", "configs_secure"])
 def started_cluster(request):
-    cluster = ClickHouseCluster(__file__)
+    cluster = ClickHouseCluster(__file__, request.param)
    cluster.__with_ssl_config = request.param == "configs_secure"
    main_configs = []
    main_configs += [os.path.join(request.param, "config.d/remote_servers.xml")]
--- a/tests/queries/0_stateless/01825_type_json_missed_values.reference
+++ b/tests/queries/0_stateless/01825_type_json_missed_values.reference
@ -0,0 +1,2 @@
+Tuple(foo Int8, k1 Int8, k2 Int8)
+1
--- a/tests/queries/0_stateless/01825_type_json_missed_values.sql
+++ b/tests/queries/0_stateless/01825_type_json_missed_values.sql
@ -0,0 +1,19 @@
+-- Tags: no-fasttest
+
+DROP TABLE IF EXISTS t_json;
+
+SET allow_experimental_object_type = 1;
+
+CREATE TABLE t_json(id UInt64, obj JSON)
+ENGINE = MergeTree ORDER BY id
+SETTINGS min_bytes_for_wide_part = 0;
+
+SYSTEM STOP MERGES t_json;
+
+INSERT INTO t_json SELECT number, '{"k1": 1, "k2": 2}' FROM numbers(1000000);
+INSERT INTO t_json VALUES (1000001, '{"foo": 1}');
+
+SELECT toTypeName(obj) FROM t_json LIMIT 1;
+SELECT count() FROM t_json WHERE obj.foo != 0;
+
+DROP TABLE IF EXISTS t_json;
--- a/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference
+++ b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.reference
@ -0,0 +1,6 @@
+10
+0
+1
+2
+3
+4
--- a/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql
+++ b/tests/queries/0_stateless/02155_read_in_order_max_rows_to_read.sql
@ -0,0 +1,22 @@
+DROP TABLE IF EXISTS t_max_rows_to_read;
+
+CREATE TABLE t_max_rows_to_read (a UInt64)
+ENGINE = MergeTree ORDER BY a
+SETTINGS index_granularity = 4;
+
+INSERT INTO t_max_rows_to_read SELECT number FROM numbers(100);
+
+SET max_threads = 1;
+
+SELECT a FROM t_max_rows_to_read WHERE a = 10 SETTINGS max_rows_to_read = 4;
+
+SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 12;
+
+-- This should work, but actually it doesn't. Need to investigate.
+-- SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 SETTINGS max_rows_to_read = 20;
+
+SELECT a FROM t_max_rows_to_read ORDER BY a LIMIT 20 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
+SELECT a FROM t_max_rows_to_read WHERE a > 10 ORDER BY a LIMIT 5 FORMAT Null SETTINGS max_rows_to_read = 12; -- { serverError 158 }
+SELECT a FROM t_max_rows_to_read WHERE a = 10 OR a = 20 FORMAT Null SETTINGS max_rows_to_read = 4; -- { serverError 158 }
+
+DROP TABLE t_max_rows_to_read;
--- a/tests/queries/0_stateless/02246_async_insert_quota.sh
+++ b/tests/queries/0_stateless/02246_async_insert_quota.sh
@ -16,7 +16,7 @@ ${CLICKHOUSE_CLIENT} -q "CREATE ROLE r02246"
 ${CLICKHOUSE_CLIENT} -q "CREATE USER u02246"
 ${CLICKHOUSE_CLIENT} -q "GRANT INSERT ON async_inserts_02246 TO r02246"
 ${CLICKHOUSE_CLIENT} -q "GRANT r02246 to u02246"
-${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 1 HOUR MAX QUERY INSERTS = 2 TO r02246"
+${CLICKHOUSE_CLIENT} -q "CREATE QUOTA q02246 FOR INTERVAL 100 YEAR MAX QUERY INSERTS = 2 TO r02246"

 ${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (1, 'a')"
 ${CLICKHOUSE_CLIENT} --user u02246 --async_insert 1 -q "INSERT INTO async_inserts_02246 VALUES (2, 'b')"
--- a/tests/queries/0_stateless/02246_clickhouse_local_drop_database.reference
+++ b/tests/queries/0_stateless/02246_clickhouse_local_drop_database.reference
--- a/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
+++ b/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh
@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+dir=${CLICKHOUSE_TEST_UNIQUE_NAME}
+[[ -d $dir ]] && rm -r $dir
+mkdir $dir
+$CLICKHOUSE_LOCAL --multiline --multiquery --path $dir -q """
+DROP DATABASE IF EXISTS test;
+CREATE DATABASE IF NOT EXISTS test;
+USE test;
+CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
+DROP DATABASE test;
+"""
+
+$CLICKHOUSE_LOCAL --multiline --multiquery -q """
+DROP DATABASE IF EXISTS test;
+CREATE DATABASE IF NOT EXISTS test;
+USE test;
+CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id;
+DROP DATABASE test;
+"""
--- a/tests/queries/0_stateless/02246_flatten_tuple.reference
+++ b/tests/queries/0_stateless/02246_flatten_tuple.reference
@ -0,0 +1,4 @@
+([1,2],['a','b'],3,'c',4)	Tuple(`t1.a` Array(UInt32), `t1.s` Array(String), b UInt32, `t2.k` String, `t2.v` UInt32)
+Tuple(id Int8, obj Tuple(k1 Int8, k2 Tuple(k3 String, k4 Nested(k5 Int8, k6 Int8)), some Int8), s String)	Tuple(id Int8, `obj.k1` Int8, `obj.k2.k3` String, `obj.k2.k4.k5` Array(Int8), `obj.k2.k4.k6` Array(Int8), `obj.some` Int8, s String)
+1	1	2	[3,4]	[0,0]	0	foo
+2	0	str	[0]	[55]	42	bar
--- a/tests/queries/0_stateless/02246_flatten_tuple.sql
+++ b/tests/queries/0_stateless/02246_flatten_tuple.sql
@ -0,0 +1,24 @@
+-- Tags: no-fasttest
+
+DROP TABLE IF EXISTS t_flatten_tuple;
+DROP TABLE IF EXISTS t_flatten_object;
+
+SET flatten_nested = 0;
+
+CREATE TABLE t_flatten_tuple(t Tuple(t1 Nested(a UInt32, s String), b UInt32, t2 Tuple(k String, v UInt32))) ENGINE = Memory;
+
+INSERT INTO t_flatten_tuple VALUES (([(1, 'a'), (2, 'b')], 3, ('c', 4)));
+
+SELECT flattenTuple(t) AS ft, toTypeName(ft) FROM t_flatten_tuple;
+
+SET allow_experimental_object_type = 1;
+CREATE TABLE t_flatten_object(data JSON) ENGINE = Memory;
+
+INSERT INTO t_flatten_object VALUES ('{"id": 1, "obj": {"k1": 1, "k2": {"k3": 2, "k4": [{"k5": 3}, {"k5": 4}]}}, "s": "foo"}');
+INSERT INTO t_flatten_object VALUES ('{"id": 2, "obj": {"k2": {"k3": "str", "k4": [{"k6": 55}]}, "some": 42}, "s": "bar"}');
+
+SELECT toTypeName(data), toTypeName(flattenTuple(data)) FROM t_flatten_object LIMIT 1;
+SELECT untuple(flattenTuple(data)) FROM t_flatten_object ORDER BY data.id;
+
+DROP TABLE IF EXISTS t_flatten_tuple;
+DROP TABLE IF EXISTS t_flatten_object;