From 237de4e775ba4080087496303c350cddbebc9043 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 14 Jun 2024 12:31:52 +0000 Subject: [PATCH 01/64] add feature --- .../system-tables/detached_tables.md | 36 + src/Databases/DatabaseAtomic.cpp | 20 +- src/Databases/DatabaseAtomic.h | 7 +- src/Databases/DatabaseOnDisk.cpp | 3 + src/Databases/DatabaseOrdinary.cpp | 37 +- src/Databases/DatabaseOrdinary.h | 3 + src/Databases/DatabasesCommon.cpp | 54 +- src/Databases/DatabasesCommon.h | 6 +- src/Databases/IDatabase.h | 63 +- src/Storages/IStorage_fwd.h | 4 +- src/Storages/System/ReadFromSystemTables.cpp | 138 ++++ src/Storages/System/ReadFromSystemTables.h | 47 ++ .../System/StorageSystemDetachedTables.cpp | 56 ++ .../System/StorageSystemDetachedTables.h | 32 + src/Storages/System/StorageSystemTables.cpp | 761 ++---------------- src/Storages/System/TablesBlockSource.cpp | 569 +++++++++++++ src/Storages/System/TablesBlockSource.h | 96 +++ src/Storages/System/attachSystemTables.cpp | 2 + .../test_system_detached_tables/__init__.py | 0 .../test_system_detached_tables/test.py | 38 + .../03172_system_detached_tables.reference | 5 + .../03172_system_detached_tables.sql | 28 + 22 files changed, 1289 insertions(+), 716 deletions(-) create mode 100644 docs/en/operations/system-tables/detached_tables.md create mode 100644 src/Storages/System/ReadFromSystemTables.cpp create mode 100644 src/Storages/System/ReadFromSystemTables.h create mode 100644 src/Storages/System/StorageSystemDetachedTables.cpp create mode 100644 src/Storages/System/StorageSystemDetachedTables.h create mode 100644 src/Storages/System/TablesBlockSource.cpp create mode 100644 src/Storages/System/TablesBlockSource.h create mode 100644 tests/integration/test_system_detached_tables/__init__.py create mode 100644 tests/integration/test_system_detached_tables/test.py create mode 100644 tests/queries/0_stateless/03172_system_detached_tables.reference create mode 100644 tests/queries/0_stateless/03172_system_detached_tables.sql diff --git a/docs/en/operations/system-tables/detached_tables.md b/docs/en/operations/system-tables/detached_tables.md new file mode 100644 index 00000000000..d86e58d2899 --- /dev/null +++ b/docs/en/operations/system-tables/detached_tables.md @@ -0,0 +1,36 @@ +--- +slug: /en/operations/system-tables/detached_tables +--- +# detached_tables + +Contains information of each detached table. + +Columns: + +- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. + +- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. + +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). + +- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY. + +- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. + + +**Example** + +```sql +SELECT * FROM system.detached_tables FORMAT Vertical; +``` + +```text +Row 1: +────── +database: base +name: t1 +uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736 +data_paths: ['/var/lib/clickhouse/store/81b/81b1c20a-b7c6-4116-a2ce-7583fb6b6736/'] +metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql +is_permanently: 1 +``` diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ccab72cfbae..ddc30627cde 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -37,8 +37,10 @@ namespace ErrorCodes class AtomicDatabaseTablesSnapshotIterator final : public DatabaseTablesSnapshotIterator { public: - explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base) - : DatabaseTablesSnapshotIterator(std::move(base)) {} + explicit AtomicDatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && base) noexcept + : DatabaseTablesSnapshotIterator(std::move(base)) + { + } UUID uuid() const override { return table()->getStorageID().uuid; } }; @@ -101,6 +103,8 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, auto table_id = table->getStorageID(); assertDetachedTableNotInUse(table_id.uuid); DatabaseOrdinary::attachTableUnlocked(name, table); + detached_tables.erase(table_id.uuid); + table_name_to_path.emplace(std::make_pair(name, relative_table_path)); } @@ -108,11 +112,11 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & { DetachedTables not_in_use; std::lock_guard lock(mutex); - auto table = DatabaseOrdinary::detachTableUnlocked(name); + auto detached_table = DatabaseOrdinary::detachTableUnlocked(name); table_name_to_path.erase(name); - detached_tables.emplace(table->getStorageID().uuid, table); + detached_tables.emplace(detached_table->getStorageID().uuid, detached_table); not_in_use = cleanupDetachedTables(); - return table; + return detached_table; } void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync) @@ -433,6 +437,12 @@ DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::Fil return std::make_unique(std::move(typeid_cast(*base_iter))); } +DatabaseDetachedTablesSnapshotIteratorPtr DatabaseAtomic::getDetachedTablesIterator( + ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name, const bool skip_not_loaded) const +{ + return DatabaseOrdinary::getDetachedTablesIterator(local_context, filter_by_table_name, skip_not_loaded); +} + UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const { if (auto table = tryGetTable(table_name, getContext())) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index b59edd479ba..47afc4dc5ef 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -1,7 +1,8 @@ #pragma once -#include #include +#include +#include namespace DB @@ -48,6 +49,9 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + DatabaseDetachedTablesSnapshotIteratorPtr + getDetachedTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; @@ -81,6 +85,7 @@ protected: //TODO store path in DatabaseWithOwnTables::tables using NameToPathMap = std::unordered_map; + NameToPathMap table_name_to_path TSA_GUARDED_BY(mutex); DetachedTables detached_tables TSA_GUARDED_BY(mutex); diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index b8154372116..25e5347e017 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -307,6 +307,9 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri try { FS::createFile(detached_permanently_flag); + + std::lock_guard lock(mutex); + snapshot_detached_tables.at(table_name).is_permanently = true; } catch (Exception & e) { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 10a8e06e8f0..046cbfa40be 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -187,7 +187,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables size_t prev_tables_count = metadata.parsed_tables.size(); size_t prev_total_dictionaries = metadata.total_dictionaries; - auto process_metadata = [&metadata, is_startup, this](const String & file_name) + auto process_metadata = [&metadata, is_startup, local_context, this](const String & file_name) mutable { fs::path path(getMetadataPath()); fs::path file_path(file_name); @@ -195,7 +195,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables try { - auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); + auto ast = parseQueryFromMetadata(log, local_context, full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { FunctionNameNormalizer::visit(ast.get()); @@ -226,6 +226,33 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); permanently_detached_tables.push_back(table_name); LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name)); + + // @TODO refactoring + auto parsed_table_metadata = ParsedTableMetadata{full_path.string(), ast}; + const auto & query = parsed_table_metadata.ast->as(); + + std::lock_guard lock(mutex); + + auto [detached_table_name, table] = createTableFromAST( + query, + database_name, + getTableDataPath(query), + std::const_pointer_cast(local_context), + LoadingStrictnessLevel::CREATE); + + const auto storage_id = table->getStorageID(); + + SnapshotDetachedTable snapshot_detached_table; + snapshot_detached_table.detabase = storage_id.getDatabaseName(); + snapshot_detached_table.table = detached_table_name; + snapshot_detached_table.uuid = storage_id.uuid; + snapshot_detached_table.is_permanently = true; + snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); + + + snapshot_detached_tables.emplace(detached_table_name, std::move(snapshot_detached_table)); + + LOG_TRACE(log, "Add detached table {} to system.detached_tables", detached_table_name); return; } @@ -487,6 +514,12 @@ DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_c return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); } +DatabaseDetachedTablesSnapshotIteratorPtr DatabaseOrdinary::getDetachedTablesIterator( + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const +{ + return DatabaseWithOwnTablesBase::getDetachedTablesIterator(local_context, filter_by_table_name, skip_not_loaded); +} + Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const { std::set unique_names; diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index ef00ac8fdfa..d2891147922 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -57,6 +57,9 @@ public: LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + Strings getAllTableNames(ContextPtr context) const override; void alterTable( diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fd38a31da5c..312c0a4477f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -2,12 +2,9 @@ #include #include -#include -#include -#include -#include #include #include +#include #include #include #include @@ -16,6 +13,10 @@ #include #include #include +#include +#include +#include +#include namespace DB @@ -237,6 +238,35 @@ DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPt return std::make_unique(std::move(filtered_tables), database_name); } +DatabaseDetachedTablesSnapshotIteratorPtr DatabaseWithOwnTablesBase::getDetachedTablesIterator( + ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const +{ + std::lock_guard lock(mutex); + if (!filter_by_table_name) + return std::make_unique(snapshot_detached_tables); + + SnapshotDetachedTables filtered_tables; + for (const auto & [table_name, storage] : tables) + if (filter_by_table_name(table_name)) + { + SnapshotDetachedTable snapshot_detached_table; + snapshot_detached_table.detabase = storage->getStorageID().getDatabaseName(); + snapshot_detached_table.table = table_name; + if (storage->getStorageID().hasUUID()) + { + snapshot_detached_table.uuid = storage->getStorageID().uuid; + } + + snapshot_detached_table.is_permanently = false; + snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); + + filtered_tables.emplace(table_name, std::move(snapshot_detached_table)); + } + + + return std::make_unique(std::move(filtered_tables)); +} + bool DatabaseWithOwnTablesBase::empty() const { std::lock_guard lock(mutex); @@ -258,6 +288,19 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(database_name), backQuote(table_name)); res = it->second; + + SnapshotDetachedTable snapshot_detached_table; + snapshot_detached_table.detabase = it->second->getStorageID().getDatabaseName(); + snapshot_detached_table.table = it->first; + if (it->second->getStorageID().hasUUID()) + { + snapshot_detached_table.uuid = it->second->getStorageID().uuid; + } + snapshot_detached_table.is_permanently = false; + snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); + + snapshot_detached_tables.emplace(it->first, std::move(snapshot_detached_table)); + tables.erase(it); res->is_detached = true; CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); @@ -298,6 +341,8 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {} already exists.", table_id.getFullTableName()); } + snapshot_detached_tables.erase(table_name); + /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; @@ -333,6 +378,7 @@ void DatabaseWithOwnTablesBase::shutdown() std::lock_guard lock(mutex); tables.clear(); + snapshot_detached_tables.clear(); } DatabaseWithOwnTablesBase::~DatabaseWithOwnTablesBase() diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 2eecf8a564f..1ca49e90c23 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -37,6 +37,9 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + DatabaseDetachedTablesSnapshotIteratorPtr + getDetachedTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; @@ -46,12 +49,13 @@ public: protected: Tables tables TSA_GUARDED_BY(mutex); + SnapshotDetachedTables snapshot_detached_tables TSA_GUARDED_BY(mutex); LoggerPtr log; DatabaseWithOwnTablesBase(const String & name_, const String & logger, ContextPtr context); void attachTableUnlocked(const String & table_name, const StoragePtr & table) TSA_REQUIRES(mutex); - StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex); + StoragePtr detachTableUnlocked(const String & table_name) TSA_REQUIRES(mutex); StoragePtr getTableUnlocked(const String & table_name) const TSA_REQUIRES(mutex); StoragePtr tryGetTableNoWait(const String & table_name) const; }; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index b00f2fe4baf..7c6b3b3004c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -5,20 +5,22 @@ #include #include #include +#include +#include #include #include -#include #include +#include #include #include -#include #include #include +#include #include #include +#include #include -#include namespace DB @@ -110,6 +112,55 @@ public: using DatabaseTablesIteratorPtr = std::unique_ptr; +struct SnapshotDetachedTable final +{ + String detabase; + String table; + UUID uuid = UUIDHelpers::Nil; + String metadata_path; + bool is_permanently{false}; +}; + +class DatabaseDetachedTablesSnapshotIterator +{ +private: + SnapshotDetachedTables snapshot; + SnapshotDetachedTables::iterator it; + +protected: + DatabaseDetachedTablesSnapshotIterator(DatabaseDetachedTablesSnapshotIterator && other) noexcept + { + size_t idx = std::distance(other.snapshot.begin(), other.it); + std::swap(snapshot, other.snapshot); + other.it = other.snapshot.end(); + it = snapshot.begin(); + std::advance(it, idx); + } + +public: + explicit DatabaseDetachedTablesSnapshotIterator(const SnapshotDetachedTables & tables_) : snapshot(tables_), it(snapshot.begin()) { } + + explicit DatabaseDetachedTablesSnapshotIterator(SnapshotDetachedTables && tables_) : snapshot(std::move(tables_)), it(snapshot.begin()) + { + } + + void next() { ++it; } + + bool isValid() const { return it != snapshot.end(); } + + String database() const { return it->second.detabase; } + + String table() const { return it->second.table; } + + UUID uuid() const { return it->second.uuid; } + + String metadataPath() const { return it->second.metadata_path; } + + bool isPermanently() const { return it->second.is_permanently; } +}; + +using DatabaseDetachedTablesSnapshotIteratorPtr = std::unique_ptr; + /** Database engine. * It is responsible for: @@ -232,6 +283,12 @@ public: /// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded. virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT + virtual DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( + ContextPtr /*context*/, const FilterByNameFunction & /*filter_by_table_name = {}*/, bool /*skip_not_loaded = false*/) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no get detached tables for Database{}", getEngineName()); + } + /// Returns list of table names. virtual Strings getAllTableNames(ContextPtr context) const { diff --git a/src/Storages/IStorage_fwd.h b/src/Storages/IStorage_fwd.h index b9243b029b0..4cbc586a745 100644 --- a/src/Storages/IStorage_fwd.h +++ b/src/Storages/IStorage_fwd.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -9,9 +10,10 @@ namespace DB { class IStorage; +struct SnapshotDetachedTable; using ConstStoragePtr = std::shared_ptr; using StoragePtr = std::shared_ptr; using Tables = std::map; - +using SnapshotDetachedTables = std::map; } diff --git a/src/Storages/System/ReadFromSystemTables.cpp b/src/Storages/System/ReadFromSystemTables.cpp new file mode 100644 index 00000000000..6e9c6a869d1 --- /dev/null +++ b/src/Storages/System/ReadFromSystemTables.cpp @@ -0,0 +1,138 @@ +#include "ReadFromSystemTables.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables( + const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool need_detached_tables) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + if (need_detached_tables) + { + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + else + { + auto table_it = database->getTablesIterator(context); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +} + +ReadFromSystemTables::ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_, + const bool need_detached_tables_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , need_detached_tables(need_detached_tables_) +{ +} + +void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context, need_detached_tables); +} + +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + if (need_detached_tables) + { + pipeline.init(createPipe()); + } + else + { + pipeline.init(createPipe()); + } +} +} diff --git a/src/Storages/System/ReadFromSystemTables.h b/src/Storages/System/ReadFromSystemTables.h new file mode 100644 index 00000000000..f8280ffcc8d --- /dev/null +++ b/src/Storages/System/ReadFromSystemTables.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ReadFromSystemTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_, + bool need_detached_tables); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + const bool need_detached_tables; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; + + template + Pipe createPipe() + { + return Pipe(std::make_shared( + std::move(columns_mask), + getOutputStream().header, + max_block_size, + std::move(filtered_databases_column), + std::move(filtered_tables_column), + context)); + } +}; +} diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp new file mode 100644 index 00000000000..1596ac6a046 --- /dev/null +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -0,0 +1,56 @@ +#include "StorageSystemDetachedTables.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +StorageSystemDetachedTables::StorageSystemDetachedTables(const StorageID & table_id_) : IStorage(table_id_) +{ + StorageInMemoryMetadata storage_metadata; + + auto description = ColumnsDescription{ + ColumnDescription{"database", std::make_shared(), "The name of the database the table is in."}, + ColumnDescription{"name", std::make_shared(), "Table name."}, + ColumnDescription{"uuid", std::make_shared(), "Table uuid (Atomic database)."}, + ColumnDescription{"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, + ColumnDescription{"is_permanently", std::make_shared(), "Table was detached permanently."}, + }; + + storage_metadata.setColumns(std::move(description)); + + setInMemoryMetadata(storage_metadata); +} + +void StorageSystemDetachedTables::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t max_block_size, + size_t /*num_streams*/) +{ + storage_snapshot->check(column_names); + auto sample_block = storage_snapshot->metadata->getSampleBlock(); + + auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); + + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size, true); + + query_plan.addStep(std::move(reading)); +} +} diff --git a/src/Storages/System/StorageSystemDetachedTables.h b/src/Storages/System/StorageSystemDetachedTables.h new file mode 100644 index 00000000000..cd042f51eaa --- /dev/null +++ b/src/Storages/System/StorageSystemDetachedTables.h @@ -0,0 +1,32 @@ +#pragma once + +#include + + +namespace DB +{ + +class Context; + +/** Implements the system table `detached_tables`, which allows you to get information about detached tables. + */ +class StorageSystemDetachedTables final : public IStorage +{ +public: + explicit StorageSystemDetachedTables(const StorageID & table_id_); + + std::string getName() const override { return "SystemDetachedTables"; } + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & /*query_info*/, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + size_t num_streams) override; + + bool isSystemStorage() const override { return true; } +}; +} diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..5a8d6315d57 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,29 +1,31 @@ +#include "StorageSystemTables.h" + +#include #include -#include +#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include @@ -32,8 +34,7 @@ namespace DB { -StorageSystemTables::StorageSystemTables(const StorageID & table_id_) - : IStorage(table_id_) +StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -46,9 +47,13 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) {"data_paths", std::make_shared(std::make_shared()), "Paths to the table data in the file systems."}, {"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, {"metadata_modification_time", std::make_shared(), "Time of latest modification of the table metadata."}, - {"metadata_version", std::make_shared(), "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."}, + {"metadata_version", + std::make_shared(), + "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."}, {"dependencies_database", std::make_shared(std::make_shared()), "Database dependencies."}, - {"dependencies_table", std::make_shared(std::make_shared()), "Table dependencies (materialized views the current table)."}, + {"dependencies_table", + std::make_shared(std::make_shared()), + "Table dependencies (materialized views the current table)."}, {"create_table_query", std::make_shared(), "The query that was used to create the table."}, {"engine_full", std::make_shared(), "Parameters of the table engine."}, {"as_select", std::make_shared(), "SELECT query for view."}, @@ -57,676 +62,54 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) {"primary_key", std::make_shared(), "The primary key expression specified in the table."}, {"sampling_key", std::make_shared(), "The sampling key expression specified in the table."}, {"storage_policy", std::make_shared(), "The storage policy."}, - {"total_rows", std::make_shared(std::make_shared()), - "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including underlying Buffer table)." - }, - {"total_bytes", std::make_shared(std::make_shared()), - "Total number of bytes, if it is possible to quickly determine exact number " - "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " - "If the table stores data on disk, returns used space on disk (i.e. compressed). " - "If the table stores data in memory, returns approximated number of used bytes in memory." - }, - {"total_bytes_uncompressed", std::make_shared(std::make_shared()), - "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " - "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." - }, + {"total_rows", + std::make_shared(std::make_shared()), + "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including " + "underlying Buffer table)."}, + {"total_bytes", + std::make_shared(std::make_shared()), + "Total number of bytes, if it is possible to quickly determine exact number " + "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " + "If the table stores data on disk, returns used space on disk (i.e. compressed). " + "If the table stores data in memory, returns approximated number of used bytes in memory."}, + {"total_bytes_uncompressed", + std::make_shared(std::make_shared()), + "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " + "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into " + "account)."}, {"parts", std::make_shared(std::make_shared()), "The total number of parts in this table."}, - {"active_parts", std::make_shared(std::make_shared()), "The number of active parts in this table."}, - {"total_marks", std::make_shared(std::make_shared()), "The total number of marks in all parts in this table."}, - {"lifetime_rows", std::make_shared(std::make_shared()), - "Total number of rows INSERTed since server start (only for Buffer tables)." - }, - {"lifetime_bytes", std::make_shared(std::make_shared()), - "Total number of bytes INSERTed since server start (only for Buffer tables)." - }, + {"active_parts", + std::make_shared(std::make_shared()), + "The number of active parts in this table."}, + {"total_marks", + std::make_shared(std::make_shared()), + "The total number of marks in all parts in this table."}, + {"lifetime_rows", + std::make_shared(std::make_shared()), + "Total number of rows INSERTed since server start (only for Buffer tables)."}, + {"lifetime_bytes", + std::make_shared(std::make_shared()), + "Total number of bytes INSERTed since server start (only for Buffer tables)."}, {"comment", std::make_shared(), "The comment for the table."}, - {"has_own_data", std::make_shared(), - "Flag that indicates whether the table itself stores some data on disk or only accesses some other source." - }, - {"loading_dependencies_database", std::make_shared(std::make_shared()), - "Database loading dependencies (list of objects which should be loaded before the current object)." - }, - {"loading_dependencies_table", std::make_shared(std::make_shared()), - "Table loading dependencies (list of objects which should be loaded before the current object)." - }, - {"loading_dependent_database", std::make_shared(std::make_shared()), - "Dependent loading database." - }, - {"loading_dependent_table", std::make_shared(std::make_shared()), - "Dependent loading table." - }, + {"has_own_data", + std::make_shared(), + "Flag that indicates whether the table itself stores some data on disk or only accesses some other source."}, + {"loading_dependencies_database", + std::make_shared(std::make_shared()), + "Database loading dependencies (list of objects which should be loaded before the current object)."}, + {"loading_dependencies_table", + std::make_shared(std::make_shared()), + "Table loading dependencies (list of objects which should be loaded before the current object)."}, + {"loading_dependent_database", std::make_shared(std::make_shared()), "Dependent loading database."}, + {"loading_dependent_table", std::make_shared(std::make_shared()), "Dependent loading table."}, }; - description.setAliases({ - {"table", std::make_shared(), "name"} - }); + description.setAliases({{"table", std::make_shared(), "name"}}); storage_metadata.setColumns(std::move(description)); setInMemoryMetadata(storage_metadata); } - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample { - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") - }; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - - Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - - -class TablesBlockSource : public ISource -{ -public: - TablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = tables_->size(); - tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - tables.insert(tables_->getDataAt(idx).toString()); - } - - String getName() const override { return "Tables"; } - -protected: - Chunk generate() override - { - if (done) - return {}; - - MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - size_t rows_count = 0; - while (rows_count < max_block_size) - { - if (tables_it && !tables_it->isValid()) - ++database_idx; - - while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - { - /// Database was deleted just now or the user has no access. - ++database_idx; - continue; - } - - break; - } - - /// This is for temporary tables. They are output in single block regardless to max_block_size. - if (database_idx >= databases->size()) - { - if (context->hasSessionContext()) - { - Tables external_tables = context->getSessionContext()->getExternalTables(); - - for (auto & table : external_tables) - { - size_t src_index = 0; - size_t res_index = 0; - - // database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // name - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.first); - - // uuid - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getStorageID().uuid); - - // engine - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - // is_temporary - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(1u); - - // data_paths - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_path - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_modification_time - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_version - // Temporary tables does not support replication - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_table - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // create_table_query - if (columns_mask[src_index++]) - { - auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - } - - // engine_full - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - const auto & settings = context->getSettingsRef(); - while (src_index < columns_mask.size()) - { - // total_rows - if (src_index == 19 && columns_mask[src_index]) - { - if (auto total_rows = table.second->totalRows(settings)) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - // total_bytes - else if (src_index == 20 && columns_mask[src_index]) - { - if (auto total_bytes = table.second->totalBytes(settings)) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - /// Fill the rest columns with defaults - else if (columns_mask[src_index]) - res_columns[res_index++]->insertDefault(); - src_index++; - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - done = true; - return Chunk(std::move(res_columns), num_rows); - } - - const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(context); - - const bool need_table = needTable(database, getPort().getHeader()); - - for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) - { - auto table_name = tables_it->name(); - if (!tables.contains(table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) - continue; - - StoragePtr table = nullptr; - TableLockHolder lock; - if (need_table) - { - table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) - { - lock = table->tryLockForShare(context->getCurrentQueryId(), - context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table - continue; - } - } - ++rows_count; - - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->uuid()); - - if (columns_mask[src_index++]) - { - chassert(table != nullptr); - res_columns[res_index++]->insert(table->getName()); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); // is_temporary - - if (columns_mask[src_index++]) - { - chassert(lock != nullptr); - Array table_paths_array; - auto paths = table->getDataPaths(); - table_paths_array.reserve(paths.size()); - for (const String & path : paths) - table_paths_array.push_back(path); - res_columns[res_index++]->insert(table_paths_array); - /// We don't need the lock anymore - lock = nullptr; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); - - StorageMetadataPtr metadata_snapshot; - if (table) - metadata_snapshot = table->getInMemoryMetadataPtr(); - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && table->supportsReplication()) - res_columns[res_index++]->insert(metadata_snapshot->metadata_version); - else - res_columns[res_index++]->insertDefault(); - } - - { - Array views_table_name_array; - Array views_database_name_array; - if (columns_mask[src_index] || columns_mask[src_index + 1]) - { - const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); - - views_table_name_array.reserve(view_ids.size()); - views_database_name_array.reserve(view_ids.size()); - for (const auto & view_id : view_ids) - { - views_table_name_array.push_back(view_id.table_name); - views_database_name_array.push_back(view_id.database_name); - } - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_database_name_array); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_table_name_array); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) - { - ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); - auto * ast_create = ast ? ast->as() : nullptr; - - if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) - { - ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - - if (columns_mask[src_index++]) - { - String engine_full; - - if (ast_create && ast_create->storage) - { - engine_full = format({context, *ast_create->storage}); - - static const char * const extra_head = " ENGINE = "; - if (startsWith(engine_full, extra_head)) - engine_full = engine_full.substr(strlen(extra_head)); - } - - res_columns[res_index++]->insert(engine_full); - } - - if (columns_mask[src_index++]) - { - String as_select; - if (ast_create && ast_create->select) - as_select = format({context, *ast_create->select}); - res_columns[res_index++]->insert(as_select); - } - } - else - src_index += 3; - - ASTPtr expression_ptr; - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto policy = table ? table->getStoragePolicy() : nullptr; - if (policy) - res_columns[res_index++]->insert(policy->getName()); - else - res_columns[res_index++]->insertDefault(); - } - - auto settings = context->getSettingsRef(); - settings.select_sequential_consistency = 0; - if (columns_mask[src_index++]) - { - auto total_rows = table ? table->totalRows(settings) : std::nullopt; - if (total_rows) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes = table->totalBytes(settings); - if (total_bytes) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); - if (total_bytes_uncompressed) - res_columns[res_index++]->insert(*total_bytes_uncompressed); - else - res_columns[res_index++]->insertDefault(); - } - - auto table_merge_tree = std::dynamic_pointer_cast(table); - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - { - res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); - } - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; - if (lifetime_rows) - res_columns[res_index++]->insert(*lifetime_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; - if (lifetime_bytes) - res_columns[res_index++]->insert(*lifetime_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot) - res_columns[res_index++]->insert(metadata_snapshot->comment); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table) - res_columns[res_index++]->insert(table->storesDataOnDisk()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) - { - auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); - auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); - - Array dependencies_databases; - Array dependencies_tables; - dependencies_databases.reserve(dependencies.size()); - dependencies_tables.reserve(dependencies.size()); - for (const auto & dependency : dependencies) - { - dependencies_databases.push_back(dependency.database_name); - dependencies_tables.push_back(dependency.table_name); - } - - Array dependents_databases; - Array dependents_tables; - dependents_databases.reserve(dependents.size()); - dependents_tables.reserve(dependents.size()); - for (const auto & dependent : dependents) - { - dependents_databases.push_back(dependent.database_name); - dependents_tables.push_back(dependent.table_name); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_tables); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_tables); - - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - return Chunk(std::move(res_columns), num_rows); - } -private: - std::vector columns_mask; - UInt64 max_block_size; - ColumnPtr databases; - NameSet tables; - size_t database_idx = 0; - DatabaseTablesIteratorPtr tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; -}; - -} - -class ReadFromSystemTables : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - { - } - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -private: - std::vector columns_mask; - size_t max_block_size; - - ColumnPtr filtered_databases_column; - ColumnPtr filtered_tables_column; -}; - void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, @@ -743,28 +126,8 @@ void StorageSystemTables::read( auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto reading = std::make_unique( - column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size, false); query_plan.addStep(std::move(reading)); } - -void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); -} - -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - Pipe pipe(std::make_shared( - std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); - pipeline.init(std::move(pipe)); -} - } diff --git a/src/Storages/System/TablesBlockSource.cpp b/src/Storages/System/TablesBlockSource.cpp new file mode 100644 index 00000000000..4ea5bb53deb --- /dev/null +++ b/src/Storages/System/TablesBlockSource.cpp @@ -0,0 +1,569 @@ +#include "TablesBlockSource.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + +} + +Chunk TablesBlockSource::generate() +{ + if (done) + return {}; + + MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + size_t rows_count = 0; + while (rows_count < max_block_size) + { + if (tables_it && !tables_it->isValid()) + ++database_idx; + + while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + { + /// Database was deleted just now or the user has no access. + ++database_idx; + continue; + } + + break; + } + + /// This is for temporary tables. They are output in single block regardless to max_block_size. + if (database_idx >= databases->size()) + { + if (context->hasSessionContext()) + { + Tables external_tables = context->getSessionContext()->getExternalTables(); + + for (auto & table : external_tables) + { + size_t src_index = 0; + size_t res_index = 0; + + // database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // name + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.first); + + // uuid + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getStorageID().uuid); + + // engine + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + // is_temporary + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(1u); + + // data_paths + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_path + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_modification_time + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_version + // Temporary tables does not support replication + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_table + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // create_table_query + if (columns_mask[src_index++]) + { + auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); + ASTPtr ast + = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + } + + // engine_full + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + const auto & settings = context->getSettingsRef(); + while (src_index < columns_mask.size()) + { + // total_rows + if (src_index == 19 && columns_mask[src_index]) + { + if (auto total_rows = table.second->totalRows(settings)) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + // total_bytes + else if (src_index == 20 && columns_mask[src_index]) + { + if (auto total_bytes = table.second->totalBytes(settings)) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + /// Fill the rest columns with defaults + else if (columns_mask[src_index]) + res_columns[res_index++]->insertDefault(); + src_index++; + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + done = true; + return Chunk(std::move(res_columns), num_rows); + } + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!tables_it || !tables_it->isValid()) + tables_it = database->getTablesIterator(context); + + const bool need_table = needTable(database, getPort().getHeader()); + + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) + { + auto table_name = tables_it->name(); + if (!tables.contains(table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + StoragePtr table = nullptr; + TableLockHolder lock; + if (need_table) + { + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) + continue; + + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } + ++rows_count; + + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(tables_it->uuid()); + + if (columns_mask[src_index++]) + { + chassert(table != nullptr); + res_columns[res_index++]->insert(table->getName()); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(0u); // is_temporary + + if (columns_mask[src_index++]) + { + chassert(lock != nullptr); + Array table_paths_array; + auto paths = table->getDataPaths(); + table_paths_array.reserve(paths.size()); + for (const String & path : paths) + table_paths_array.push_back(path); + res_columns[res_index++]->insert(table_paths_array); + /// We don't need the lock anymore + lock = nullptr; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); + + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && table->supportsReplication()) + res_columns[res_index++]->insert(metadata_snapshot->metadata_version); + else + res_columns[res_index++]->insertDefault(); + } + + { + Array views_table_name_array; + Array views_database_name_array; + if (columns_mask[src_index] || columns_mask[src_index + 1]) + { + const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); + + views_table_name_array.reserve(view_ids.size()); + views_database_name_array.reserve(view_ids.size()); + for (const auto & view_id : view_ids) + { + views_table_name_array.push_back(view_id.table_name); + views_database_name_array.push_back(view_id.database_name); + } + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_database_name_array); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_table_name_array); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) + { + ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); + auto * ast_create = ast ? ast->as() : nullptr; + + if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) + { + ast_create->uuid = UUIDHelpers::Nil; + ast_create->to_inner_uuid = UUIDHelpers::Nil; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + + if (columns_mask[src_index++]) + { + String engine_full; + + if (ast_create && ast_create->storage) + { + engine_full = format({context, *ast_create->storage}); + + static const char * const extra_head = " ENGINE = "; + if (startsWith(engine_full, extra_head)) + engine_full = engine_full.substr(strlen(extra_head)); + } + + res_columns[res_index++]->insert(engine_full); + } + + if (columns_mask[src_index++]) + { + String as_select; + if (ast_create && ast_create->select) + as_select = format({context, *ast_create->select}); + res_columns[res_index++]->insert(as_select); + } + } + else + src_index += 3; + + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto policy = table ? table->getStoragePolicy() : nullptr; + if (policy) + res_columns[res_index++]->insert(policy->getName()); + else + res_columns[res_index++]->insertDefault(); + } + + auto settings = context->getSettingsRef(); + settings.select_sequential_consistency = 0; + if (columns_mask[src_index++]) + { + auto total_rows = table ? table->totalRows(settings) : std::nullopt; + if (total_rows) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes = table->totalBytes(settings); + if (total_bytes) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); + if (total_bytes_uncompressed) + res_columns[res_index++]->insert(*total_bytes_uncompressed); + else + res_columns[res_index++]->insertDefault(); + } + + auto table_merge_tree = std::dynamic_pointer_cast(table); + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + { + res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); + } + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; + if (lifetime_rows) + res_columns[res_index++]->insert(*lifetime_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; + if (lifetime_bytes) + res_columns[res_index++]->insert(*lifetime_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot) + res_columns[res_index++]->insert(metadata_snapshot->comment); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table) + res_columns[res_index++]->insert(table->storesDataOnDisk()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) + { + auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); + auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); + + Array dependencies_databases; + Array dependencies_tables; + dependencies_databases.reserve(dependencies.size()); + dependencies_tables.reserve(dependencies.size()); + for (const auto & dependency : dependencies) + { + dependencies_databases.push_back(dependency.database_name); + dependencies_tables.push_back(dependency.table_name); + } + + Array dependents_databases; + Array dependents_tables; + dependents_databases.reserve(dependents.size()); + dependents_tables.reserve(dependents.size()); + for (const auto & dependent : dependents) + { + dependents_databases.push_back(dependent.database_name); + dependents_tables.push_back(dependent.table_name); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_tables); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_tables); + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + return Chunk(std::move(res_columns), num_rows); +} + +Chunk DetachedTablesBlockSource::generate() +{ + if (done) + return {}; + + MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + { + LOG_DEBUG(lg, "Database was deleted just now or the user has no access"); + continue; + } + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!detached_tables_it || !detached_tables_it->isValid()) + detached_tables_it = database->getDetachedTablesIterator(context, {}, false); + + for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) + { + const auto detached_table_name = detached_tables_it->table(); + LOG_DEBUG(lg, "detached_table_name={}", detached_table_name); + + if (!detached_tables.contains(detached_table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) + continue; + + fillResultColumnsByDetachedTableIterator(result_columns); + ++rows_count; + } + } + + const UInt64 num_rows = result_columns.at(0)->size(); + done = true; + return Chunk(std::move(result_columns), num_rows); +} + +void DetachedTablesBlockSource::fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const +{ + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->database()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->table()); + + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->uuid()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->metadataPath()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->isPermanently()); +} +} diff --git a/src/Storages/System/TablesBlockSource.h b/src/Storages/System/TablesBlockSource.h new file mode 100644 index 00000000000..9698fdd494c --- /dev/null +++ b/src/Storages/System/TablesBlockSource.h @@ -0,0 +1,96 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class TablesBlockSource : public ISource +{ +public: + TablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = tables_->size(); + tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + tables.insert(tables_->getDataAt(idx).toString()); + } + + String getName() const override { return "Tables"; } + +protected: + Chunk generate() override; + +private: + std::vector columns_mask; + UInt64 max_block_size; + ColumnPtr databases; + NameSet tables; + size_t database_idx = 0; + DatabaseTablesIteratorPtr tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; +}; + +class DetachedTablesBlockSource : public ISource +{ +public: + DetachedTablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr detached_tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = detached_tables_->size(); + detached_tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + { + detached_tables.insert(detached_tables_->getDataAt(idx).toString()); + } + } + + String getName() const override { return "DetachedTables"; } + +protected: + Chunk generate() override; + +private: + const std::vector columns_mask; + const UInt64 max_block_size; + const ColumnPtr databases; + NameSet detached_tables; + DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; + + // temp log for debug + LoggerPtr lg = getLogger("DetachedTablesBlockSource"); + + void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const; +}; +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 6ff86b26ca9..97eda1db3fa 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attachNoDescription(context, system_database, "zeros_mt", "Multithreaded version of system.zeros.", true); attach(context, system_database, "databases", "Lists all databases of the current server."); attachNoDescription(context, system_database, "tables", "Lists all tables of the current server."); + attachNoDescription(context, system_database, "detached_tables", "Lists all detached tables of the current server."); attachNoDescription(context, system_database, "columns", "Lists all columns from all tables of the current server."); attach(context, system_database, "functions", "Contains a list of all available ordinary and aggregate functions with their descriptions."); attach(context, system_database, "events", "Contains profiling events and their current value."); diff --git a/tests/integration/test_system_detached_tables/__init__.py b/tests/integration/test_system_detached_tables/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py new file mode 100644 index 00000000000..2e0165deba2 --- /dev/null +++ b/tests/integration/test_system_detached_tables/test.py @@ -0,0 +1,38 @@ +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node_default", stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_system_detached_tables(): + node.query("CREATE TABLE test_table (n Int64) ENGINE=MergeTree ORDER BY n;") + node.query("CREATE TABLE test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n;") + + result = node.query("SELECT * FROM system.detached_tables") + assert result == "" + + node.query("DETACH TABLE test_table") + node.query("DETACH TABLE test_table_perm PERMANENTLY") + + result = node.query("SELECT name FROM system.detached_tables") + assert result == "test_table\ntest_table_perm\n" + + node.restart_clickhouse() + + result = node.query("SELECT name FROM system.detached_tables") + assert result == "test_table_perm\n" + + node.restart_clickhouse() + + result = node.query("SELECT name FROM system.detached_tables") + assert result == "test_table_perm\n" diff --git a/tests/queries/0_stateless/03172_system_detached_tables.reference b/tests/queries/0_stateless/03172_system_detached_tables.reference new file mode 100644 index 00000000000..1cf9e0275c9 --- /dev/null +++ b/tests/queries/0_stateless/03172_system_detached_tables.reference @@ -0,0 +1,5 @@ +test_system_detached_tables test_table 0 +test_system_detached_tables test_table_perm 1 +test_system_detached_tables test_table 0 +test_system_detached_tables test_table_perm 1 +test_system_detached_tables test_table 0 diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql new file mode 100644 index 00000000000..13465d02b80 --- /dev/null +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -0,0 +1,28 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS test_system_detached_tables; +CREATE DATABASE IF NOT EXISTS test_system_detached_tables; + +CREATE TABLE test_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables; + +DETACH TABLE test_system_detached_tables.test_table; +SELECT database, name, is_permanently FROM system.detached_tables; + +ATTACH TABLE test_system_detached_tables.test_table; + +CREATE TABLE test_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables; + +DETACH TABLE test_system_detached_tables.test_table_perm PERMANENTLY; +SELECT database, name, is_permanently FROM system.detached_tables; + +DETACH TABLE test_system_detached_tables.test_table SYNC; +SELECT database, name, is_permanently FROM system.detached_tables; + +SELECT database, name, is_permanently FROM system.detached_tables WHERE name='test_table'; + +DROP DATABASE test_system_detached_tables; + + + From 21936f32a655b1008b6233ee274549b0ce2b33c7 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 21 Jun 2024 12:32:54 +0000 Subject: [PATCH 02/64] fix style --- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabasesCommon.cpp | 4 ++-- src/Databases/IDatabase.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 046cbfa40be..ac40bf4e90e 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -243,7 +243,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables const auto storage_id = table->getStorageID(); SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.detabase = storage_id.getDatabaseName(); + snapshot_detached_table.database = storage_id.getDatabaseName(); snapshot_detached_table.table = detached_table_name; snapshot_detached_table.uuid = storage_id.uuid; snapshot_detached_table.is_permanently = true; diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 312c0a4477f..c4cad663878 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -250,7 +250,7 @@ DatabaseDetachedTablesSnapshotIteratorPtr DatabaseWithOwnTablesBase::getDetached if (filter_by_table_name(table_name)) { SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.detabase = storage->getStorageID().getDatabaseName(); + snapshot_detached_table.database = storage->getStorageID().getDatabaseName(); snapshot_detached_table.table = table_name; if (storage->getStorageID().hasUUID()) { @@ -290,7 +290,7 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.detabase = it->second->getStorageID().getDatabaseName(); + snapshot_detached_table.database = it->second->getStorageID().getDatabaseName(); snapshot_detached_table.table = it->first; if (it->second->getStorageID().hasUUID()) { diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 7c6b3b3004c..2ce4650798c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -114,7 +114,7 @@ using DatabaseTablesIteratorPtr = std::unique_ptr; struct SnapshotDetachedTable final { - String detabase; + String database; String table; UUID uuid = UUIDHelpers::Nil; String metadata_path; @@ -148,7 +148,7 @@ public: bool isValid() const { return it != snapshot.end(); } - String database() const { return it->second.detabase; } + String database() const { return it->second.database; } String table() const { return it->second.table; } From 7aa7b7977221d5bc9b65d0e09d272ffca96acddc Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 21 Jun 2024 16:33:09 +0000 Subject: [PATCH 03/64] refactor --- .../system-tables/detached_tables.md | 9 +- src/Databases/DatabaseAtomic.h | 1 - src/Databases/DatabaseOrdinary.cpp | 6 +- src/Storages/System/ReadFromSystemTables.cpp | 138 ---- src/Storages/System/ReadFromSystemTables.h | 47 -- .../System/StorageSystemDetachedTables.cpp | 252 +++++- src/Storages/System/StorageSystemTables.cpp | 761 ++++++++++++++++-- src/Storages/System/TablesBlockSource.cpp | 569 ------------- src/Storages/System/TablesBlockSource.h | 96 --- .../test_system_detached_tables/test.py | 6 +- .../03172_system_detached_tables.sql | 8 +- 11 files changed, 959 insertions(+), 934 deletions(-) delete mode 100644 src/Storages/System/ReadFromSystemTables.cpp delete mode 100644 src/Storages/System/ReadFromSystemTables.h delete mode 100644 src/Storages/System/TablesBlockSource.cpp delete mode 100644 src/Storages/System/TablesBlockSource.h diff --git a/docs/en/operations/system-tables/detached_tables.md b/docs/en/operations/system-tables/detached_tables.md index d86e58d2899..f669c8fd251 100644 --- a/docs/en/operations/system-tables/detached_tables.md +++ b/docs/en/operations/system-tables/detached_tables.md @@ -9,14 +9,14 @@ Columns: - `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. -- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). -- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY. - - `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. +- `is_permanently` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag indicates that the table was detached PERMANENTLY. + **Example** @@ -28,9 +28,8 @@ SELECT * FROM system.detached_tables FORMAT Vertical; Row 1: ────── database: base -name: t1 +table: t1 uuid: 81b1c20a-b7c6-4116-a2ce-7583fb6b6736 -data_paths: ['/var/lib/clickhouse/store/81b/81b1c20a-b7c6-4116-a2ce-7583fb6b6736/'] metadata_path: /var/lib/clickhouse/store/461/461cf698-fd0b-406d-8c01-5d8fd5748a91/t1.sql is_permanently: 1 ``` diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 47afc4dc5ef..25c5cdfbde1 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -85,7 +85,6 @@ protected: //TODO store path in DatabaseWithOwnTables::tables using NameToPathMap = std::unordered_map; - NameToPathMap table_name_to_path TSA_GUARDED_BY(mutex); DetachedTables detached_tables TSA_GUARDED_BY(mutex); diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index ac40bf4e90e..7086f6e628e 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -187,7 +187,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables size_t prev_tables_count = metadata.parsed_tables.size(); size_t prev_total_dictionaries = metadata.total_dictionaries; - auto process_metadata = [&metadata, is_startup, local_context, this](const String & file_name) mutable + auto process_metadata = [&metadata, is_startup, local_context, this](const String & file_name) { fs::path path(getMetadataPath()); fs::path file_path(file_name); @@ -227,7 +227,6 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables permanently_detached_tables.push_back(table_name); LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name)); - // @TODO refactoring auto parsed_table_metadata = ParsedTableMetadata{full_path.string(), ast}; const auto & query = parsed_table_metadata.ast->as(); @@ -249,10 +248,9 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables snapshot_detached_table.is_permanently = true; snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); - snapshot_detached_tables.emplace(detached_table_name, std::move(snapshot_detached_table)); - LOG_TRACE(log, "Add detached table {} to system.detached_tables", detached_table_name); + LOG_TRACE(log, "Add permanently detached table {} to system.detached_tables", detached_table_name); return; } diff --git a/src/Storages/System/ReadFromSystemTables.cpp b/src/Storages/System/ReadFromSystemTables.cpp deleted file mode 100644 index 6e9c6a869d1..00000000000 --- a/src/Storages/System/ReadFromSystemTables.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include "ReadFromSystemTables.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables( - const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool need_detached_tables) -{ - Block sample{ - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - if (need_detached_tables) - { - auto table_it = database->getDetachedTablesIterator(context, {}, false); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->table()); - } - } - else - { - auto table_it = database->getTablesIterator(context); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - } - - Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -} - -ReadFromSystemTables::ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_, - const bool need_detached_tables_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , need_detached_tables(need_detached_tables_) -{ -} - -void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context, need_detached_tables); -} - -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - if (need_detached_tables) - { - pipeline.init(createPipe()); - } - else - { - pipeline.init(createPipe()); - } -} -} diff --git a/src/Storages/System/ReadFromSystemTables.h b/src/Storages/System/ReadFromSystemTables.h deleted file mode 100644 index f8280ffcc8d..00000000000 --- a/src/Storages/System/ReadFromSystemTables.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ReadFromSystemTables : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_, - bool need_detached_tables); - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -private: - std::vector columns_mask; - size_t max_block_size; - const bool need_detached_tables; - - ColumnPtr filtered_databases_column; - ColumnPtr filtered_tables_column; - - template - Pipe createPipe() - { - return Pipe(std::make_shared( - std::move(columns_mask), - getOutputStream().header, - max_block_size, - std::move(filtered_databases_column), - std::move(filtered_tables_column), - context)); - } -}; -} diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 1596ac6a046..9805fa2a8cf 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -1,28 +1,232 @@ #include "StorageSystemDetachedTables.h" +#include #include #include #include #include +#include +#include #include +#include +#include #include #include #include -#include -#include #include +#include + +#include namespace DB { +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +class DetachedTablesBlockSource : public ISource +{ +public: + DetachedTablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr detached_tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = detached_tables_->size(); + detached_tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + { + detached_tables.insert(detached_tables_->getDataAt(idx).toString()); + } + } + + String getName() const override { return "DetachedTables"; } + +protected: + Chunk generate() override + { + if (done) + return {}; + + MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + continue; + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!detached_tables_it || !detached_tables_it->isValid()) + detached_tables_it = database->getDetachedTablesIterator(context, {}, false); + + for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) + { + const auto detached_table_name = detached_tables_it->table(); + + if (!detached_tables.contains(detached_table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) + continue; + + fillResultColumnsByDetachedTableIterator(result_columns); + ++rows_count; + } + } + + const UInt64 num_rows = result_columns.at(0)->size(); + done = true; + return Chunk(std::move(result_columns), num_rows); + } + +private: + const std::vector columns_mask; + const UInt64 max_block_size; + const ColumnPtr databases; + NameSet detached_tables; + DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; + + void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const + { + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->database()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->table()); + + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->uuid()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->metadataPath()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->isPermanently()); + } +}; + +} + +class ReadFromSystemDetachedTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemDetachedTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + StorageSystemDetachedTables::StorageSystemDetachedTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; auto description = ColumnsDescription{ ColumnDescription{"database", std::make_shared(), "The name of the database the table is in."}, - ColumnDescription{"name", std::make_shared(), "Table name."}, + ColumnDescription{"table", std::make_shared(), "Table name."}, ColumnDescription{"uuid", std::make_shared(), "Table uuid (Atomic database)."}, ColumnDescription{"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, ColumnDescription{"is_permanently", std::make_shared(), "Table was detached permanently."}, @@ -48,9 +252,47 @@ void StorageSystemDetachedTables::read( auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); - auto reading = std::make_unique( - column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size, true); + auto reading = std::make_unique( + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); query_plan.addStep(std::move(reading)); } + +ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) +{ +} + +void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} + +void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto pipe = Pipe(std::make_shared( + std::move(columns_mask), + getOutputStream().header, + max_block_size, + std::move(filtered_databases_column), + std::move(filtered_tables_column), + context)); + pipeline.init(std::move(pipe)); +} } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 5a8d6315d57..783b899c978 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,31 +1,29 @@ -#include "StorageSystemTables.h" - -#include #include -#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include #include @@ -34,7 +32,8 @@ namespace DB { -StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) +StorageSystemTables::StorageSystemTables(const StorageID & table_id_) + : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -47,13 +46,9 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage {"data_paths", std::make_shared(std::make_shared()), "Paths to the table data in the file systems."}, {"metadata_path", std::make_shared(), "Path to the table metadata in the file system."}, {"metadata_modification_time", std::make_shared(), "Time of latest modification of the table metadata."}, - {"metadata_version", - std::make_shared(), - "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."}, + {"metadata_version", std::make_shared(), "Metadata version for ReplicatedMergeTree table, 0 for non ReplicatedMergeTree table."}, {"dependencies_database", std::make_shared(std::make_shared()), "Database dependencies."}, - {"dependencies_table", - std::make_shared(std::make_shared()), - "Table dependencies (materialized views the current table)."}, + {"dependencies_table", std::make_shared(std::make_shared()), "Table dependencies (materialized views the current table)."}, {"create_table_query", std::make_shared(), "The query that was used to create the table."}, {"engine_full", std::make_shared(), "Parameters of the table engine."}, {"as_select", std::make_shared(), "SELECT query for view."}, @@ -62,54 +57,676 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage {"primary_key", std::make_shared(), "The primary key expression specified in the table."}, {"sampling_key", std::make_shared(), "The sampling key expression specified in the table."}, {"storage_policy", std::make_shared(), "The storage policy."}, - {"total_rows", - std::make_shared(std::make_shared()), - "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including " - "underlying Buffer table)."}, - {"total_bytes", - std::make_shared(std::make_shared()), - "Total number of bytes, if it is possible to quickly determine exact number " - "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " - "If the table stores data on disk, returns used space on disk (i.e. compressed). " - "If the table stores data in memory, returns approximated number of used bytes in memory."}, - {"total_bytes_uncompressed", - std::make_shared(std::make_shared()), - "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " - "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into " - "account)."}, + {"total_rows", std::make_shared(std::make_shared()), + "Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise NULL (including underlying Buffer table)." + }, + {"total_bytes", std::make_shared(std::make_shared()), + "Total number of bytes, if it is possible to quickly determine exact number " + "of bytes for the table on storage, otherwise NULL (does not includes any underlying storage). " + "If the table stores data on disk, returns used space on disk (i.e. compressed). " + "If the table stores data in memory, returns approximated number of used bytes in memory." + }, + {"total_bytes_uncompressed", std::make_shared(std::make_shared()), + "Total number of uncompressed bytes, if it's possible to quickly determine the exact number " + "of bytes from the part checksums for the table on storage, otherwise NULL (does not take underlying storage (if any) into account)." + }, {"parts", std::make_shared(std::make_shared()), "The total number of parts in this table."}, - {"active_parts", - std::make_shared(std::make_shared()), - "The number of active parts in this table."}, - {"total_marks", - std::make_shared(std::make_shared()), - "The total number of marks in all parts in this table."}, - {"lifetime_rows", - std::make_shared(std::make_shared()), - "Total number of rows INSERTed since server start (only for Buffer tables)."}, - {"lifetime_bytes", - std::make_shared(std::make_shared()), - "Total number of bytes INSERTed since server start (only for Buffer tables)."}, + {"active_parts", std::make_shared(std::make_shared()), "The number of active parts in this table."}, + {"total_marks", std::make_shared(std::make_shared()), "The total number of marks in all parts in this table."}, + {"lifetime_rows", std::make_shared(std::make_shared()), + "Total number of rows INSERTed since server start (only for Buffer tables)." + }, + {"lifetime_bytes", std::make_shared(std::make_shared()), + "Total number of bytes INSERTed since server start (only for Buffer tables)." + }, {"comment", std::make_shared(), "The comment for the table."}, - {"has_own_data", - std::make_shared(), - "Flag that indicates whether the table itself stores some data on disk or only accesses some other source."}, - {"loading_dependencies_database", - std::make_shared(std::make_shared()), - "Database loading dependencies (list of objects which should be loaded before the current object)."}, - {"loading_dependencies_table", - std::make_shared(std::make_shared()), - "Table loading dependencies (list of objects which should be loaded before the current object)."}, - {"loading_dependent_database", std::make_shared(std::make_shared()), "Dependent loading database."}, - {"loading_dependent_table", std::make_shared(std::make_shared()), "Dependent loading table."}, + {"has_own_data", std::make_shared(), + "Flag that indicates whether the table itself stores some data on disk or only accesses some other source." + }, + {"loading_dependencies_database", std::make_shared(std::make_shared()), + "Database loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependencies_table", std::make_shared(std::make_shared()), + "Table loading dependencies (list of objects which should be loaded before the current object)." + }, + {"loading_dependent_database", std::make_shared(std::make_shared()), + "Dependent loading database." + }, + {"loading_dependent_table", std::make_shared(std::make_shared()), + "Dependent loading table." + }, }; - description.setAliases({{"table", std::make_shared(), "name"}}); + description.setAliases({ + {"table", std::make_shared(), "name"} + }); storage_metadata.setColumns(std::move(description)); setInMemoryMetadata(storage_metadata); } + +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) +{ + Block sample { + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") + }; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + + Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + + +class TablesBlockSource : public ISource +{ +public: + TablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = tables_->size(); + tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + tables.insert(tables_->getDataAt(idx).toString()); + } + + String getName() const override { return "Tables"; } + +protected: + Chunk generate() override + { + if (done) + return {}; + + MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + size_t rows_count = 0; + while (rows_count < max_block_size) + { + if (tables_it && !tables_it->isValid()) + ++database_idx; + + while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + { + /// Database was deleted just now or the user has no access. + ++database_idx; + continue; + } + + break; + } + + /// This is for temporary tables. They are output in single block regardless to max_block_size. + if (database_idx >= databases->size()) + { + if (context->hasSessionContext()) + { + Tables external_tables = context->getSessionContext()->getExternalTables(); + + for (auto & table : external_tables) + { + size_t src_index = 0; + size_t res_index = 0; + + // database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // name + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.first); + + // uuid + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getStorageID().uuid); + + // engine + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + // is_temporary + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(1u); + + // data_paths + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_path + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_modification_time + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_version + // Temporary tables does not support replication + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_table + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // create_table_query + if (columns_mask[src_index++]) + { + auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); + ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + } + + // engine_full + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + const auto & settings = context->getSettingsRef(); + while (src_index < columns_mask.size()) + { + // total_rows + if (src_index == 19 && columns_mask[src_index]) + { + if (auto total_rows = table.second->totalRows(settings)) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + // total_bytes + else if (src_index == 20 && columns_mask[src_index]) + { + if (auto total_bytes = table.second->totalBytes(settings)) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + /// Fill the rest columns with defaults + else if (columns_mask[src_index]) + res_columns[res_index++]->insertDefault(); + src_index++; + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + done = true; + return Chunk(std::move(res_columns), num_rows); + } + + const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!tables_it || !tables_it->isValid()) + tables_it = database->getTablesIterator(context); + + const bool need_table = needTable(database, getPort().getHeader()); + + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) + { + auto table_name = tables_it->name(); + if (!tables.contains(table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + StoragePtr table = nullptr; + TableLockHolder lock; + if (need_table) + { + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) + continue; + + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), + context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } + ++rows_count; + + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(tables_it->uuid()); + + if (columns_mask[src_index++]) + { + chassert(table != nullptr); + res_columns[res_index++]->insert(table->getName()); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(0u); // is_temporary + + if (columns_mask[src_index++]) + { + chassert(lock != nullptr); + Array table_paths_array; + auto paths = table->getDataPaths(); + table_paths_array.reserve(paths.size()); + for (const String & path : paths) + table_paths_array.push_back(path); + res_columns[res_index++]->insert(table_paths_array); + /// We don't need the lock anymore + lock = nullptr; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); + + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && table->supportsReplication()) + res_columns[res_index++]->insert(metadata_snapshot->metadata_version); + else + res_columns[res_index++]->insertDefault(); + } + + { + Array views_table_name_array; + Array views_database_name_array; + if (columns_mask[src_index] || columns_mask[src_index + 1]) + { + const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); + + views_table_name_array.reserve(view_ids.size()); + views_database_name_array.reserve(view_ids.size()); + for (const auto & view_id : view_ids) + { + views_table_name_array.push_back(view_id.table_name); + views_database_name_array.push_back(view_id.database_name); + } + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_database_name_array); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_table_name_array); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) + { + ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); + auto * ast_create = ast ? ast->as() : nullptr; + + if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) + { + ast_create->uuid = UUIDHelpers::Nil; + ast_create->to_inner_uuid = UUIDHelpers::Nil; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + + if (columns_mask[src_index++]) + { + String engine_full; + + if (ast_create && ast_create->storage) + { + engine_full = format({context, *ast_create->storage}); + + static const char * const extra_head = " ENGINE = "; + if (startsWith(engine_full, extra_head)) + engine_full = engine_full.substr(strlen(extra_head)); + } + + res_columns[res_index++]->insert(engine_full); + } + + if (columns_mask[src_index++]) + { + String as_select; + if (ast_create && ast_create->select) + as_select = format({context, *ast_create->select}); + res_columns[res_index++]->insert(as_select); + } + } + else + src_index += 3; + + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto policy = table ? table->getStoragePolicy() : nullptr; + if (policy) + res_columns[res_index++]->insert(policy->getName()); + else + res_columns[res_index++]->insertDefault(); + } + + auto settings = context->getSettingsRef(); + settings.select_sequential_consistency = 0; + if (columns_mask[src_index++]) + { + auto total_rows = table ? table->totalRows(settings) : std::nullopt; + if (total_rows) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes = table->totalBytes(settings); + if (total_bytes) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); + if (total_bytes_uncompressed) + res_columns[res_index++]->insert(*total_bytes_uncompressed); + else + res_columns[res_index++]->insertDefault(); + } + + auto table_merge_tree = std::dynamic_pointer_cast(table); + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + { + res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); + } + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; + if (lifetime_rows) + res_columns[res_index++]->insert(*lifetime_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; + if (lifetime_bytes) + res_columns[res_index++]->insert(*lifetime_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot) + res_columns[res_index++]->insert(metadata_snapshot->comment); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table) + res_columns[res_index++]->insert(table->storesDataOnDisk()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) + { + auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); + auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); + + Array dependencies_databases; + Array dependencies_tables; + dependencies_databases.reserve(dependencies.size()); + dependencies_tables.reserve(dependencies.size()); + for (const auto & dependency : dependencies) + { + dependencies_databases.push_back(dependency.database_name); + dependencies_tables.push_back(dependency.table_name); + } + + Array dependents_databases; + Array dependents_tables; + dependents_databases.reserve(dependents.size()); + dependents_tables.reserve(dependents.size()); + for (const auto & dependent : dependents) + { + dependents_databases.push_back(dependent.database_name); + dependents_tables.push_back(dependent.table_name); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_tables); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_tables); + + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + return Chunk(std::move(res_columns), num_rows); + } +private: + std::vector columns_mask; + UInt64 max_block_size; + ColumnPtr databases; + NameSet tables; + size_t database_idx = 0; + DatabaseTablesIteratorPtr tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; +}; + +} + +class ReadFromSystemTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, @@ -126,8 +743,28 @@ void StorageSystemTables::read( auto [columns_mask, res_block] = getQueriedColumnsMaskAndHeader(sample_block, column_names); auto reading = std::make_unique( - column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size, false); + column_names, query_info, storage_snapshot, context, std::move(res_block), std::move(columns_mask), max_block_size); query_plan.addStep(std::move(reading)); } + +void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} + +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + Pipe pipe(std::make_shared( + std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); + pipeline.init(std::move(pipe)); +} + } diff --git a/src/Storages/System/TablesBlockSource.cpp b/src/Storages/System/TablesBlockSource.cpp deleted file mode 100644 index 4ea5bb53deb..00000000000 --- a/src/Storages/System/TablesBlockSource.cpp +++ /dev/null @@ -1,569 +0,0 @@ -#include "TablesBlockSource.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - -} - -Chunk TablesBlockSource::generate() -{ - if (done) - return {}; - - MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - size_t rows_count = 0; - while (rows_count < max_block_size) - { - if (tables_it && !tables_it->isValid()) - ++database_idx; - - while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - { - /// Database was deleted just now or the user has no access. - ++database_idx; - continue; - } - - break; - } - - /// This is for temporary tables. They are output in single block regardless to max_block_size. - if (database_idx >= databases->size()) - { - if (context->hasSessionContext()) - { - Tables external_tables = context->getSessionContext()->getExternalTables(); - - for (auto & table : external_tables) - { - size_t src_index = 0; - size_t res_index = 0; - - // database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // name - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.first); - - // uuid - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getStorageID().uuid); - - // engine - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - // is_temporary - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(1u); - - // data_paths - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_path - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_modification_time - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_version - // Temporary tables does not support replication - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_table - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // create_table_query - if (columns_mask[src_index++]) - { - auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - ASTPtr ast - = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - } - - // engine_full - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - const auto & settings = context->getSettingsRef(); - while (src_index < columns_mask.size()) - { - // total_rows - if (src_index == 19 && columns_mask[src_index]) - { - if (auto total_rows = table.second->totalRows(settings)) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - // total_bytes - else if (src_index == 20 && columns_mask[src_index]) - { - if (auto total_bytes = table.second->totalBytes(settings)) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - /// Fill the rest columns with defaults - else if (columns_mask[src_index]) - res_columns[res_index++]->insertDefault(); - src_index++; - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - done = true; - return Chunk(std::move(res_columns), num_rows); - } - - const bool need_to_check_access_for_tables - = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(context); - - const bool need_table = needTable(database, getPort().getHeader()); - - for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) - { - auto table_name = tables_it->name(); - if (!tables.contains(table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) - continue; - - StoragePtr table = nullptr; - TableLockHolder lock; - if (need_table) - { - table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) - { - lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table - continue; - } - } - ++rows_count; - - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->uuid()); - - if (columns_mask[src_index++]) - { - chassert(table != nullptr); - res_columns[res_index++]->insert(table->getName()); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); // is_temporary - - if (columns_mask[src_index++]) - { - chassert(lock != nullptr); - Array table_paths_array; - auto paths = table->getDataPaths(); - table_paths_array.reserve(paths.size()); - for (const String & path : paths) - table_paths_array.push_back(path); - res_columns[res_index++]->insert(table_paths_array); - /// We don't need the lock anymore - lock = nullptr; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); - - StorageMetadataPtr metadata_snapshot; - if (table) - metadata_snapshot = table->getInMemoryMetadataPtr(); - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && table->supportsReplication()) - res_columns[res_index++]->insert(metadata_snapshot->metadata_version); - else - res_columns[res_index++]->insertDefault(); - } - - { - Array views_table_name_array; - Array views_database_name_array; - if (columns_mask[src_index] || columns_mask[src_index + 1]) - { - const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); - - views_table_name_array.reserve(view_ids.size()); - views_database_name_array.reserve(view_ids.size()); - for (const auto & view_id : view_ids) - { - views_table_name_array.push_back(view_id.table_name); - views_database_name_array.push_back(view_id.database_name); - } - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_database_name_array); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_table_name_array); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) - { - ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); - auto * ast_create = ast ? ast->as() : nullptr; - - if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) - { - ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - - if (columns_mask[src_index++]) - { - String engine_full; - - if (ast_create && ast_create->storage) - { - engine_full = format({context, *ast_create->storage}); - - static const char * const extra_head = " ENGINE = "; - if (startsWith(engine_full, extra_head)) - engine_full = engine_full.substr(strlen(extra_head)); - } - - res_columns[res_index++]->insert(engine_full); - } - - if (columns_mask[src_index++]) - { - String as_select; - if (ast_create && ast_create->select) - as_select = format({context, *ast_create->select}); - res_columns[res_index++]->insert(as_select); - } - } - else - src_index += 3; - - ASTPtr expression_ptr; - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto policy = table ? table->getStoragePolicy() : nullptr; - if (policy) - res_columns[res_index++]->insert(policy->getName()); - else - res_columns[res_index++]->insertDefault(); - } - - auto settings = context->getSettingsRef(); - settings.select_sequential_consistency = 0; - if (columns_mask[src_index++]) - { - auto total_rows = table ? table->totalRows(settings) : std::nullopt; - if (total_rows) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes = table->totalBytes(settings); - if (total_bytes) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); - if (total_bytes_uncompressed) - res_columns[res_index++]->insert(*total_bytes_uncompressed); - else - res_columns[res_index++]->insertDefault(); - } - - auto table_merge_tree = std::dynamic_pointer_cast(table); - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - { - res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); - } - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; - if (lifetime_rows) - res_columns[res_index++]->insert(*lifetime_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; - if (lifetime_bytes) - res_columns[res_index++]->insert(*lifetime_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot) - res_columns[res_index++]->insert(metadata_snapshot->comment); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table) - res_columns[res_index++]->insert(table->storesDataOnDisk()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) - { - auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); - auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); - - Array dependencies_databases; - Array dependencies_tables; - dependencies_databases.reserve(dependencies.size()); - dependencies_tables.reserve(dependencies.size()); - for (const auto & dependency : dependencies) - { - dependencies_databases.push_back(dependency.database_name); - dependencies_tables.push_back(dependency.table_name); - } - - Array dependents_databases; - Array dependents_tables; - dependents_databases.reserve(dependents.size()); - dependents_tables.reserve(dependents.size()); - for (const auto & dependent : dependents) - { - dependents_databases.push_back(dependent.database_name); - dependents_tables.push_back(dependent.table_name); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_tables); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_tables); - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - return Chunk(std::move(res_columns), num_rows); -} - -Chunk DetachedTablesBlockSource::generate() -{ - if (done) - return {}; - - MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - { - LOG_DEBUG(lg, "Database was deleted just now or the user has no access"); - continue; - } - - const bool need_to_check_access_for_tables - = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!detached_tables_it || !detached_tables_it->isValid()) - detached_tables_it = database->getDetachedTablesIterator(context, {}, false); - - for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) - { - const auto detached_table_name = detached_tables_it->table(); - LOG_DEBUG(lg, "detached_table_name={}", detached_table_name); - - if (!detached_tables.contains(detached_table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) - continue; - - fillResultColumnsByDetachedTableIterator(result_columns); - ++rows_count; - } - } - - const UInt64 num_rows = result_columns.at(0)->size(); - done = true; - return Chunk(std::move(result_columns), num_rows); -} - -void DetachedTablesBlockSource::fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const -{ - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->database()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->table()); - - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->uuid()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->metadataPath()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->isPermanently()); -} -} diff --git a/src/Storages/System/TablesBlockSource.h b/src/Storages/System/TablesBlockSource.h deleted file mode 100644 index 9698fdd494c..00000000000 --- a/src/Storages/System/TablesBlockSource.h +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace DB -{ - -class TablesBlockSource : public ISource -{ -public: - TablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = tables_->size(); - tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - tables.insert(tables_->getDataAt(idx).toString()); - } - - String getName() const override { return "Tables"; } - -protected: - Chunk generate() override; - -private: - std::vector columns_mask; - UInt64 max_block_size; - ColumnPtr databases; - NameSet tables; - size_t database_idx = 0; - DatabaseTablesIteratorPtr tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; -}; - -class DetachedTablesBlockSource : public ISource -{ -public: - DetachedTablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr detached_tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = detached_tables_->size(); - detached_tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - { - detached_tables.insert(detached_tables_->getDataAt(idx).toString()); - } - } - - String getName() const override { return "DetachedTables"; } - -protected: - Chunk generate() override; - -private: - const std::vector columns_mask; - const UInt64 max_block_size; - const ColumnPtr databases; - NameSet detached_tables; - DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; - - // temp log for debug - LoggerPtr lg = getLogger("DetachedTablesBlockSource"); - - void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const; -}; -} diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index 2e0165deba2..02734d8bf14 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -24,15 +24,15 @@ def test_system_detached_tables(): node.query("DETACH TABLE test_table") node.query("DETACH TABLE test_table_perm PERMANENTLY") - result = node.query("SELECT name FROM system.detached_tables") + result = node.query("SELECT table FROM system.detached_tables") assert result == "test_table\ntest_table_perm\n" node.restart_clickhouse() - result = node.query("SELECT name FROM system.detached_tables") + result = node.query("SELECT table FROM system.detached_tables") assert result == "test_table_perm\n" node.restart_clickhouse() - result = node.query("SELECT name FROM system.detached_tables") + result = node.query("SELECT table FROM system.detached_tables") assert result == "test_table_perm\n" diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql index 13465d02b80..558dfe4cf18 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.sql +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -7,7 +7,7 @@ CREATE TABLE test_system_detached_tables.test_table (n Int64) ENGINE=MergeTree O SELECT * FROM system.detached_tables; DETACH TABLE test_system_detached_tables.test_table; -SELECT database, name, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables; ATTACH TABLE test_system_detached_tables.test_table; @@ -15,12 +15,12 @@ CREATE TABLE test_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeT SELECT * FROM system.detached_tables; DETACH TABLE test_system_detached_tables.test_table_perm PERMANENTLY; -SELECT database, name, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables; DETACH TABLE test_system_detached_tables.test_table SYNC; -SELECT database, name, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables; -SELECT database, name, is_permanently FROM system.detached_tables WHERE name='test_table'; +SELECT database, table, is_permanently FROM system.detached_tables WHERE table='test_table'; DROP DATABASE test_system_detached_tables; From 00f74905a769d06a451c6472a0f461b25266b3be Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Mon, 24 Jun 2024 09:07:23 +0000 Subject: [PATCH 04/64] small up --- src/Databases/DatabaseOrdinary.cpp | 5 ++++- src/Databases/IDatabase.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 7086f6e628e..3975eb46a4b 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -244,7 +244,10 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables SnapshotDetachedTable snapshot_detached_table; snapshot_detached_table.database = storage_id.getDatabaseName(); snapshot_detached_table.table = detached_table_name; - snapshot_detached_table.uuid = storage_id.uuid; + if (storage_id.hasUUID()) + { + snapshot_detached_table.uuid = storage_id.uuid; + } snapshot_detached_table.is_permanently = true; snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 2ce4650798c..76d25908fda 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -286,7 +286,7 @@ public: virtual DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( ContextPtr /*context*/, const FilterByNameFunction & /*filter_by_table_name = {}*/, bool /*skip_not_loaded = false*/) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no get detached tables for Database{}", getEngineName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no get detached tables for Database {}", getEngineName()); } /// Returns list of table names. From c7dc99757afec4dbe4ac5b1bcfab037e141839d8 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 26 Jun 2024 15:39:16 +0000 Subject: [PATCH 05/64] apply some comments --- src/Databases/DatabaseOrdinary.cpp | 31 ++++-------- src/Databases/DatabasesCommon.cpp | 48 +++++++------------ src/Databases/IDatabase.h | 4 +- .../test_system_detached_tables/test.py | 22 +++++---- 4 files changed, 42 insertions(+), 63 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 3975eb46a4b..ef0c981ddad 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -227,31 +227,18 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables permanently_detached_tables.push_back(table_name); LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name)); - auto parsed_table_metadata = ParsedTableMetadata{full_path.string(), ast}; - const auto & query = parsed_table_metadata.ast->as(); - std::lock_guard lock(mutex); - auto [detached_table_name, table] = createTableFromAST( - query, - database_name, - getTableDataPath(query), - std::const_pointer_cast(local_context), - LoadingStrictnessLevel::CREATE); + const auto detached_table_name = create_query->getTable(); - const auto storage_id = table->getStorageID(); - - SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.database = storage_id.getDatabaseName(); - snapshot_detached_table.table = detached_table_name; - if (storage_id.hasUUID()) - { - snapshot_detached_table.uuid = storage_id.uuid; - } - snapshot_detached_table.is_permanently = true; - snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); - - snapshot_detached_tables.emplace(detached_table_name, std::move(snapshot_detached_table)); + snapshot_detached_tables.emplace( + detached_table_name, + SnapshotDetachedTable{ + .database = create_query->getDatabase(), + .table = detached_table_name, + .uuid = create_query->uuid, + .metadata_path = getObjectMetadataPath(detached_table_name), + .is_permanently = true}); LOG_TRACE(log, "Add permanently detached table {} to system.detached_tables", detached_table_name); return; diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 3b54980597c..470625ddd8b 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -2,6 +2,10 @@ #include #include +#include +#include +#include +#include #include #include #include @@ -13,10 +17,6 @@ #include #include #include -#include -#include -#include -#include namespace DB @@ -245,26 +245,15 @@ DatabaseDetachedTablesSnapshotIteratorPtr DatabaseWithOwnTablesBase::getDetached if (!filter_by_table_name) return std::make_unique(snapshot_detached_tables); - SnapshotDetachedTables filtered_tables; - for (const auto & [table_name, storage] : tables) - if (filter_by_table_name(table_name)) + SnapshotDetachedTables filtered_detached_tables; + for (const auto & [detached_table_name, snapshot] : snapshot_detached_tables) + if (filter_by_table_name(detached_table_name)) { - SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.database = storage->getStorageID().getDatabaseName(); - snapshot_detached_table.table = table_name; - if (storage->getStorageID().hasUUID()) - { - snapshot_detached_table.uuid = storage->getStorageID().uuid; - } - - snapshot_detached_table.is_permanently = false; - snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); - - filtered_tables.emplace(table_name, std::move(snapshot_detached_table)); + filtered_detached_tables.emplace(detached_table_name, snapshot); } - return std::make_unique(std::move(filtered_tables)); + return std::make_unique(std::move(filtered_detached_tables)); } bool DatabaseWithOwnTablesBase::empty() const @@ -289,17 +278,14 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n backQuote(database_name), backQuote(table_name)); res = it->second; - SnapshotDetachedTable snapshot_detached_table; - snapshot_detached_table.database = it->second->getStorageID().getDatabaseName(); - snapshot_detached_table.table = it->first; - if (it->second->getStorageID().hasUUID()) - { - snapshot_detached_table.uuid = it->second->getStorageID().uuid; - } - snapshot_detached_table.is_permanently = false; - snapshot_detached_table.metadata_path = getObjectMetadataPath(snapshot_detached_table.table); - - snapshot_detached_tables.emplace(it->first, std::move(snapshot_detached_table)); + snapshot_detached_tables.emplace( + it->first, + SnapshotDetachedTable{ + .database = it->second->getStorageID().getDatabaseName(), + .table = it->first, + .uuid = it->second->getStorageID().uuid, + .metadata_path = getObjectMetadataPath(it->first), + .is_permanently = false}); tables.erase(it); res->is_detached = true; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 76d25908fda..e856dd3d2f0 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -118,7 +118,7 @@ struct SnapshotDetachedTable final String table; UUID uuid = UUIDHelpers::Nil; String metadata_path; - bool is_permanently{false}; + bool is_permanently{}; }; class DatabaseDetachedTablesSnapshotIterator @@ -286,7 +286,7 @@ public: virtual DatabaseDetachedTablesSnapshotIteratorPtr getDetachedTablesIterator( ContextPtr /*context*/, const FilterByNameFunction & /*filter_by_table_name = {}*/, bool /*skip_not_loaded = false*/) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "There is no get detached tables for Database {}", getEngineName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get detached tables for Database{}", getEngineName()); } /// Returns list of table names. diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index 02734d8bf14..bbac5e391f8 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -18,21 +18,27 @@ def test_system_detached_tables(): node.query("CREATE TABLE test_table (n Int64) ENGINE=MergeTree ORDER BY n;") node.query("CREATE TABLE test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n;") - result = node.query("SELECT * FROM system.detached_tables") - assert result == "" + test_table_uuid = node.query("SELECT uuid FROM system.tables WHERE table='test_table'").rstrip('\n') + test_table_metadata_path = node.query("SELECT metadata_path FROM system.tables WHERE table='test_table'").rstrip('\n') + + test_table_perm_uuid = node.query("SELECT uuid FROM system.tables WHERE table='test_table_perm'").rstrip('\n') + test_table_perm_metadata_path = node.query("SELECT metadata_path FROM system.tables WHERE table='test_table_perm'").rstrip('\n') + + assert "" == node.query("SELECT * FROM system.detached_tables") node.query("DETACH TABLE test_table") node.query("DETACH TABLE test_table_perm PERMANENTLY") - result = node.query("SELECT table FROM system.detached_tables") - assert result == "test_table\ntest_table_perm\n" + querry = "SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables FORMAT Values" + result = node.query(querry) + assert result == f"('default','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" node.restart_clickhouse() - result = node.query("SELECT table FROM system.detached_tables") - assert result == "test_table_perm\n" + result = node.query(querry) + assert result == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" node.restart_clickhouse() - result = node.query("SELECT table FROM system.detached_tables") - assert result == "test_table_perm\n" + result = node.query(querry) + assert result == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" From de8f1fc6e3adb6c8f346b2fda4e34c2240e1bf8e Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 07:32:56 +0000 Subject: [PATCH 06/64] general logic for tables --- src/Storages/System/ReadFromSystemTables.cpp | 157 ++++ src/Storages/System/ReadFromSystemTables.h | 66 ++ .../System/StorageSystemDetachedTables.cpp | 237 +------ src/Storages/System/StorageSystemTables.cpp | 669 +----------------- src/Storages/System/TablesBlockSource.cpp | 564 +++++++++++++++ src/Storages/System/TablesBlockSource.h | 92 +++ 6 files changed, 894 insertions(+), 891 deletions(-) create mode 100644 src/Storages/System/ReadFromSystemTables.cpp create mode 100644 src/Storages/System/ReadFromSystemTables.h create mode 100644 src/Storages/System/TablesBlockSource.cpp create mode 100644 src/Storages/System/TablesBlockSource.h diff --git a/src/Storages/System/ReadFromSystemTables.cpp b/src/Storages/System/ReadFromSystemTables.cpp new file mode 100644 index 00000000000..638cab1b63b --- /dev/null +++ b/src/Storages/System/ReadFromSystemTables.cpp @@ -0,0 +1,157 @@ +#include "ReadFromSystemTables.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables( + const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool need_detached_tables) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + if (need_detached_tables) + { + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + else + { + auto table_it = database->getTablesIterator(context); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +} + +ReadFromSystemTables::ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) +{ +} + +void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + applyFiltersImpl(std::move(added_filter_nodes), false); +} + +void ReadFromSystemTables::applyFiltersImpl(ActionDAGNodes added_filter_nodes, const bool need_detached_tables_) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context, need_detached_tables_); +} + +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + pipeline.init(createPipe()); +} + +ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + const size_t max_block_size_) + : ReadFromSystemTables( + column_names_, query_info_, storage_snapshot_, context_, std::move(sample_block), std::move(columns_mask_), max_block_size_) +{ +} + +void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + applyFiltersImpl(std::move(added_filter_nodes), true); +} + +void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + pipeline.init(createPipe()); +} +} diff --git a/src/Storages/System/ReadFromSystemTables.h b/src/Storages/System/ReadFromSystemTables.h new file mode 100644 index 00000000000..f8f34363159 --- /dev/null +++ b/src/Storages/System/ReadFromSystemTables.h @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ReadFromSystemTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +protected: + void applyFiltersImpl(ActionDAGNodes added_filter_nodes, bool need_detached_tables_); + + template + Pipe createPipe() + { + return Pipe(std::make_shared( + std::move(columns_mask), + getOutputStream().header, + max_block_size, + std::move(filtered_databases_column), + std::move(filtered_tables_column), + context)); + } + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + +class ReadFromSystemDetachedTables : public ReadFromSystemTables +{ +public: + std::string getName() const override { return "ReadFromSystemDetachedTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; +}; +} diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 9805fa2a8cf..99c15fdae0e 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -22,204 +23,6 @@ namespace DB { -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample{ - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - auto table_it = database->getDetachedTablesIterator(context, {}, false); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->table()); - } - } - - Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -class DetachedTablesBlockSource : public ISource -{ -public: - DetachedTablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr detached_tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = detached_tables_->size(); - detached_tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - { - detached_tables.insert(detached_tables_->getDataAt(idx).toString()); - } - } - - String getName() const override { return "DetachedTables"; } - -protected: - Chunk generate() override - { - if (done) - return {}; - - MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - continue; - - const bool need_to_check_access_for_tables - = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!detached_tables_it || !detached_tables_it->isValid()) - detached_tables_it = database->getDetachedTablesIterator(context, {}, false); - - for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) - { - const auto detached_table_name = detached_tables_it->table(); - - if (!detached_tables.contains(detached_table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) - continue; - - fillResultColumnsByDetachedTableIterator(result_columns); - ++rows_count; - } - } - - const UInt64 num_rows = result_columns.at(0)->size(); - done = true; - return Chunk(std::move(result_columns), num_rows); - } - -private: - const std::vector columns_mask; - const UInt64 max_block_size; - const ColumnPtr databases; - NameSet detached_tables; - DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; - - void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const - { - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->database()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->table()); - - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->uuid()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->metadataPath()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->isPermanently()); - } -}; - -} - -class ReadFromSystemDetachedTables : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemDetachedTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemDetachedTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_); - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -private: - std::vector columns_mask; - size_t max_block_size; - - ColumnPtr filtered_databases_column; - ColumnPtr filtered_tables_column; -}; - StorageSystemDetachedTables::StorageSystemDetachedTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -257,42 +60,4 @@ void StorageSystemDetachedTables::read( query_plan.addStep(std::move(reading)); } - -ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) -{ -} - -void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); -} - -void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - auto pipe = Pipe(std::make_shared( - std::move(columns_mask), - getOutputStream().header, - max_block_size, - std::move(filtered_databases_column), - std::move(filtered_tables_column), - context)); - pipeline.init(std::move(pipe)); -} } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..40190f53fd3 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,29 +1,30 @@ +#include #include -#include +#include +#include +#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include +#include #include @@ -105,628 +106,6 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) setInMemoryMetadata(storage_metadata); } - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample { - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") - }; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - - Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - - -class TablesBlockSource : public ISource -{ -public: - TablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = tables_->size(); - tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - tables.insert(tables_->getDataAt(idx).toString()); - } - - String getName() const override { return "Tables"; } - -protected: - Chunk generate() override - { - if (done) - return {}; - - MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - size_t rows_count = 0; - while (rows_count < max_block_size) - { - if (tables_it && !tables_it->isValid()) - ++database_idx; - - while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - { - /// Database was deleted just now or the user has no access. - ++database_idx; - continue; - } - - break; - } - - /// This is for temporary tables. They are output in single block regardless to max_block_size. - if (database_idx >= databases->size()) - { - if (context->hasSessionContext()) - { - Tables external_tables = context->getSessionContext()->getExternalTables(); - - for (auto & table : external_tables) - { - size_t src_index = 0; - size_t res_index = 0; - - // database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // name - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.first); - - // uuid - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getStorageID().uuid); - - // engine - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - // is_temporary - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(1u); - - // data_paths - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_path - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_modification_time - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_version - // Temporary tables does not support replication - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_table - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // create_table_query - if (columns_mask[src_index++]) - { - auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - } - - // engine_full - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - const auto & settings = context->getSettingsRef(); - while (src_index < columns_mask.size()) - { - // total_rows - if (src_index == 19 && columns_mask[src_index]) - { - if (auto total_rows = table.second->totalRows(settings)) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - // total_bytes - else if (src_index == 20 && columns_mask[src_index]) - { - if (auto total_bytes = table.second->totalBytes(settings)) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - /// Fill the rest columns with defaults - else if (columns_mask[src_index]) - res_columns[res_index++]->insertDefault(); - src_index++; - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - done = true; - return Chunk(std::move(res_columns), num_rows); - } - - const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(context); - - const bool need_table = needTable(database, getPort().getHeader()); - - for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) - { - auto table_name = tables_it->name(); - if (!tables.contains(table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) - continue; - - StoragePtr table = nullptr; - TableLockHolder lock; - if (need_table) - { - table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) - { - lock = table->tryLockForShare(context->getCurrentQueryId(), - context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table - continue; - } - } - ++rows_count; - - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->uuid()); - - if (columns_mask[src_index++]) - { - chassert(table != nullptr); - res_columns[res_index++]->insert(table->getName()); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); // is_temporary - - if (columns_mask[src_index++]) - { - chassert(lock != nullptr); - Array table_paths_array; - auto paths = table->getDataPaths(); - table_paths_array.reserve(paths.size()); - for (const String & path : paths) - table_paths_array.push_back(path); - res_columns[res_index++]->insert(table_paths_array); - /// We don't need the lock anymore - lock = nullptr; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); - - StorageMetadataPtr metadata_snapshot; - if (table) - metadata_snapshot = table->getInMemoryMetadataPtr(); - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && table->supportsReplication()) - res_columns[res_index++]->insert(metadata_snapshot->metadata_version); - else - res_columns[res_index++]->insertDefault(); - } - - { - Array views_table_name_array; - Array views_database_name_array; - if (columns_mask[src_index] || columns_mask[src_index + 1]) - { - const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); - - views_table_name_array.reserve(view_ids.size()); - views_database_name_array.reserve(view_ids.size()); - for (const auto & view_id : view_ids) - { - views_table_name_array.push_back(view_id.table_name); - views_database_name_array.push_back(view_id.database_name); - } - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_database_name_array); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_table_name_array); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) - { - ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); - auto * ast_create = ast ? ast->as() : nullptr; - - if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) - { - ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - - if (columns_mask[src_index++]) - { - String engine_full; - - if (ast_create && ast_create->storage) - { - engine_full = format({context, *ast_create->storage}); - - static const char * const extra_head = " ENGINE = "; - if (startsWith(engine_full, extra_head)) - engine_full = engine_full.substr(strlen(extra_head)); - } - - res_columns[res_index++]->insert(engine_full); - } - - if (columns_mask[src_index++]) - { - String as_select; - if (ast_create && ast_create->select) - as_select = format({context, *ast_create->select}); - res_columns[res_index++]->insert(as_select); - } - } - else - src_index += 3; - - ASTPtr expression_ptr; - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto policy = table ? table->getStoragePolicy() : nullptr; - if (policy) - res_columns[res_index++]->insert(policy->getName()); - else - res_columns[res_index++]->insertDefault(); - } - - auto settings = context->getSettingsRef(); - settings.select_sequential_consistency = 0; - if (columns_mask[src_index++]) - { - auto total_rows = table ? table->totalRows(settings) : std::nullopt; - if (total_rows) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes = table->totalBytes(settings); - if (total_bytes) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); - if (total_bytes_uncompressed) - res_columns[res_index++]->insert(*total_bytes_uncompressed); - else - res_columns[res_index++]->insertDefault(); - } - - auto table_merge_tree = std::dynamic_pointer_cast(table); - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - { - res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); - } - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; - if (lifetime_rows) - res_columns[res_index++]->insert(*lifetime_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; - if (lifetime_bytes) - res_columns[res_index++]->insert(*lifetime_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot) - res_columns[res_index++]->insert(metadata_snapshot->comment); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table) - res_columns[res_index++]->insert(table->storesDataOnDisk()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) - { - auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); - auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); - - Array dependencies_databases; - Array dependencies_tables; - dependencies_databases.reserve(dependencies.size()); - dependencies_tables.reserve(dependencies.size()); - for (const auto & dependency : dependencies) - { - dependencies_databases.push_back(dependency.database_name); - dependencies_tables.push_back(dependency.table_name); - } - - Array dependents_databases; - Array dependents_tables; - dependents_databases.reserve(dependents.size()); - dependents_tables.reserve(dependents.size()); - for (const auto & dependent : dependents) - { - dependents_databases.push_back(dependent.database_name); - dependents_tables.push_back(dependent.table_name); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_tables); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_tables); - - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - return Chunk(std::move(res_columns), num_rows); - } -private: - std::vector columns_mask; - UInt64 max_block_size; - ColumnPtr databases; - NameSet tables; - size_t database_idx = 0; - DatabaseTablesIteratorPtr tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; -}; - -} - -class ReadFromSystemTables : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_) - : SourceStepWithFilter( - DataStream{.header = std::move(sample_block)}, - column_names_, - query_info_, - storage_snapshot_, - context_) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - { - } - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -private: - std::vector columns_mask; - size_t max_block_size; - - ColumnPtr filtered_databases_column; - ColumnPtr filtered_tables_column; -}; - void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, @@ -747,24 +126,4 @@ void StorageSystemTables::read( query_plan.addStep(std::move(reading)); } - -void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); -} - -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - Pipe pipe(std::make_shared( - std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); - pipeline.init(std::move(pipe)); -} - } diff --git a/src/Storages/System/TablesBlockSource.cpp b/src/Storages/System/TablesBlockSource.cpp new file mode 100644 index 00000000000..d5481856f1b --- /dev/null +++ b/src/Storages/System/TablesBlockSource.cpp @@ -0,0 +1,564 @@ +#include "TablesBlockSource.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace +{ + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = {"database", "name", "uuid", "metadata_modification_time"}; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + +} + +Chunk TablesBlockSource::generate() +{ + if (done) + return {}; + + MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + size_t rows_count = 0; + while (rows_count < max_block_size) + { + if (tables_it && !tables_it->isValid()) + ++database_idx; + + while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + { + /// Database was deleted just now or the user has no access. + ++database_idx; + continue; + } + + break; + } + + /// This is for temporary tables. They are output in single block regardless to max_block_size. + if (database_idx >= databases->size()) + { + if (context->hasSessionContext()) + { + Tables external_tables = context->getSessionContext()->getExternalTables(); + + for (auto & table : external_tables) + { + size_t src_index = 0; + size_t res_index = 0; + + // database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // name + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.first); + + // uuid + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getStorageID().uuid); + + // engine + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + // is_temporary + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(1u); + + // data_paths + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_path + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_modification_time + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_version + // Temporary tables does not support replication + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_table + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // create_table_query + if (columns_mask[src_index++]) + { + auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); + ASTPtr ast + = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + } + + // engine_full + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + const auto & settings = context->getSettingsRef(); + while (src_index < columns_mask.size()) + { + // total_rows + if (src_index == 19 && columns_mask[src_index]) + { + if (auto total_rows = table.second->totalRows(settings)) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + // total_bytes + else if (src_index == 20 && columns_mask[src_index]) + { + if (auto total_bytes = table.second->totalBytes(settings)) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + /// Fill the rest columns with defaults + else if (columns_mask[src_index]) + res_columns[res_index++]->insertDefault(); + src_index++; + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + done = true; + return Chunk(std::move(res_columns), num_rows); + } + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!tables_it || !tables_it->isValid()) + tables_it = database->getTablesIterator(context); + + const bool need_table = needTable(database, getPort().getHeader()); + + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) + { + auto table_name = tables_it->name(); + if (!tables.contains(table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + StoragePtr table = nullptr; + TableLockHolder lock; + if (need_table) + { + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) + continue; + + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } + ++rows_count; + + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(tables_it->uuid()); + + if (columns_mask[src_index++]) + { + chassert(table != nullptr); + res_columns[res_index++]->insert(table->getName()); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(0u); // is_temporary + + if (columns_mask[src_index++]) + { + chassert(lock != nullptr); + Array table_paths_array; + auto paths = table->getDataPaths(); + table_paths_array.reserve(paths.size()); + for (const String & path : paths) + table_paths_array.push_back(path); + res_columns[res_index++]->insert(table_paths_array); + /// We don't need the lock anymore + lock = nullptr; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); + + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && table->supportsReplication()) + res_columns[res_index++]->insert(metadata_snapshot->metadata_version); + else + res_columns[res_index++]->insertDefault(); + } + + { + Array views_table_name_array; + Array views_database_name_array; + if (columns_mask[src_index] || columns_mask[src_index + 1]) + { + const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); + + views_table_name_array.reserve(view_ids.size()); + views_database_name_array.reserve(view_ids.size()); + for (const auto & view_id : view_ids) + { + views_table_name_array.push_back(view_id.table_name); + views_database_name_array.push_back(view_id.database_name); + } + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_database_name_array); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_table_name_array); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) + { + ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); + auto * ast_create = ast ? ast->as() : nullptr; + + if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) + { + ast_create->uuid = UUIDHelpers::Nil; + ast_create->to_inner_uuid = UUIDHelpers::Nil; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + + if (columns_mask[src_index++]) + { + String engine_full; + + if (ast_create && ast_create->storage) + { + engine_full = format({context, *ast_create->storage}); + + static const char * const extra_head = " ENGINE = "; + if (startsWith(engine_full, extra_head)) + engine_full = engine_full.substr(strlen(extra_head)); + } + + res_columns[res_index++]->insert(engine_full); + } + + if (columns_mask[src_index++]) + { + String as_select; + if (ast_create && ast_create->select) + as_select = format({context, *ast_create->select}); + res_columns[res_index++]->insert(as_select); + } + } + else + src_index += 3; + + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto policy = table ? table->getStoragePolicy() : nullptr; + if (policy) + res_columns[res_index++]->insert(policy->getName()); + else + res_columns[res_index++]->insertDefault(); + } + + auto settings = context->getSettingsRef(); + settings.select_sequential_consistency = 0; + if (columns_mask[src_index++]) + { + auto total_rows = table ? table->totalRows(settings) : std::nullopt; + if (total_rows) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes = table->totalBytes(settings); + if (total_bytes) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); + if (total_bytes_uncompressed) + res_columns[res_index++]->insert(*total_bytes_uncompressed); + else + res_columns[res_index++]->insertDefault(); + } + + auto table_merge_tree = std::dynamic_pointer_cast(table); + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + { + res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); + } + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; + if (lifetime_rows) + res_columns[res_index++]->insert(*lifetime_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; + if (lifetime_bytes) + res_columns[res_index++]->insert(*lifetime_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot) + res_columns[res_index++]->insert(metadata_snapshot->comment); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table) + res_columns[res_index++]->insert(table->storesDataOnDisk()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) + { + auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); + auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); + + Array dependencies_databases; + Array dependencies_tables; + dependencies_databases.reserve(dependencies.size()); + dependencies_tables.reserve(dependencies.size()); + for (const auto & dependency : dependencies) + { + dependencies_databases.push_back(dependency.database_name); + dependencies_tables.push_back(dependency.table_name); + } + + Array dependents_databases; + Array dependents_tables; + dependents_databases.reserve(dependents.size()); + dependents_tables.reserve(dependents.size()); + for (const auto & dependent : dependents) + { + dependents_databases.push_back(dependent.database_name); + dependents_tables.push_back(dependent.table_name); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_tables); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_tables); + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + return Chunk(std::move(res_columns), num_rows); +} + +Chunk DetachedTablesBlockSource::generate() +{ + if (done) + return {}; + + MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + continue; + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!detached_tables_it || !detached_tables_it->isValid()) + detached_tables_it = database->getDetachedTablesIterator(context, {}, false); + + for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) + { + const auto detached_table_name = detached_tables_it->table(); + + if (!detached_tables.contains(detached_table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) + continue; + + fillResultColumnsByDetachedTableIterator(result_columns); + ++rows_count; + } + } + + const UInt64 num_rows = result_columns.at(0)->size(); + done = true; + return Chunk(std::move(result_columns), num_rows); +} + +void DetachedTablesBlockSource::fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const +{ + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->database()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->table()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->uuid()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->metadataPath()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->isPermanently()); +} +} diff --git a/src/Storages/System/TablesBlockSource.h b/src/Storages/System/TablesBlockSource.h new file mode 100644 index 00000000000..a9f2226dca8 --- /dev/null +++ b/src/Storages/System/TablesBlockSource.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class TablesBlockSource : public ISource +{ +public: + TablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = tables_->size(); + tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + tables.insert(tables_->getDataAt(idx).toString()); + } + + String getName() const override { return "Tables"; } + +protected: + Chunk generate() override; + +private: + std::vector columns_mask; + UInt64 max_block_size; + ColumnPtr databases; + NameSet tables; + size_t database_idx = 0; + DatabaseTablesIteratorPtr tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; +}; + +class DetachedTablesBlockSource : public ISource +{ +public: + DetachedTablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr detached_tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = detached_tables_->size(); + detached_tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + { + detached_tables.insert(detached_tables_->getDataAt(idx).toString()); + } + } + + String getName() const override { return "DetachedTables"; } + +protected: + Chunk generate() override; + +private: + const std::vector columns_mask; + const UInt64 max_block_size; + const ColumnPtr databases; + NameSet detached_tables; + DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; + + void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const; +}; +} From e06387e6da4a89d9aab145dd057b6a99348e3ee8 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 07:37:23 +0000 Subject: [PATCH 07/64] fix stle --- .../test_system_detached_tables/test.py | 33 +++++++++++++------ 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index bbac5e391f8..45bc70147b6 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -18,11 +18,19 @@ def test_system_detached_tables(): node.query("CREATE TABLE test_table (n Int64) ENGINE=MergeTree ORDER BY n;") node.query("CREATE TABLE test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n;") - test_table_uuid = node.query("SELECT uuid FROM system.tables WHERE table='test_table'").rstrip('\n') - test_table_metadata_path = node.query("SELECT metadata_path FROM system.tables WHERE table='test_table'").rstrip('\n') + test_table_uuid = node.query( + "SELECT uuid FROM system.tables WHERE table='test_table'" + ).rstrip('\n') + test_table_metadata_path = node.query( + "SELECT metadata_path FROM system.tables WHERE table='test_table'" + ).rstrip('\n') - test_table_perm_uuid = node.query("SELECT uuid FROM system.tables WHERE table='test_table_perm'").rstrip('\n') - test_table_perm_metadata_path = node.query("SELECT metadata_path FROM system.tables WHERE table='test_table_perm'").rstrip('\n') + test_table_perm_uuid = node.query( + "SELECT uuid FROM system.tables WHERE table='test_table_perm'" + ).rstrip('\n') + test_table_perm_metadata_path = node.query( + "SELECT metadata_path FROM system.tables WHERE table='test_table_perm'" + ).rstrip('\n') assert "" == node.query("SELECT * FROM system.detached_tables") @@ -31,12 +39,17 @@ def test_system_detached_tables(): querry = "SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables FORMAT Values" result = node.query(querry) - assert result == f"('default','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" - - node.restart_clickhouse() - - result = node.query(querry) - assert result == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + assert ( + result + == f"('default','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + ) + node.restart_clickhouse() + + result = node.query(querry) + assert ( + result + == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + ) node.restart_clickhouse() From 1c9ccbae1142d585f7534441a6abc5e75acfdce3 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 07:49:07 +0000 Subject: [PATCH 08/64] small up --- src/Databases/DatabasesCommon.cpp | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 470625ddd8b..4cf914fea56 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -270,37 +270,36 @@ StoragePtr DatabaseWithOwnTablesBase::detachTable(ContextPtr /* context_ */, con StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_name) { - StoragePtr res; - auto it = tables.find(table_name); if (it == tables.end()) throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuote(database_name), backQuote(table_name)); - res = it->second; + + auto table_storage = it->second; snapshot_detached_tables.emplace( - it->first, + table_name, SnapshotDetachedTable{ .database = it->second->getStorageID().getDatabaseName(), - .table = it->first, + .table = table_name, .uuid = it->second->getStorageID().uuid, - .metadata_path = getObjectMetadataPath(it->first), + .metadata_path = getObjectMetadataPath(table_name), .is_permanently = false}); tables.erase(it); - res->is_detached = true; + table_storage->is_detached = true; - if (res->isSystemStorage() == false) - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + if (table_storage->isSystemStorage() == false) + CurrentMetrics::sub(getAttachedCounterForStorage(table_storage), 1); - auto table_id = res->getStorageID(); + auto table_id = table_storage->getStorageID(); if (table_id.hasUUID()) { assert(database_name == DatabaseCatalog::TEMPORARY_DATABASE || getUUID() != UUIDHelpers::Nil); DatabaseCatalog::instance().removeUUIDMapping(table_id.uuid); } - return res; + return table_storage; } void DatabaseWithOwnTablesBase::attachTable(ContextPtr /* context_ */, const String & table_name, const StoragePtr & table, const String &) From 215802e74eb841449af6cb80f25b66a3610707dd Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 08:28:35 +0000 Subject: [PATCH 09/64] fix style --- .../test_system_detached_tables/test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index 45bc70147b6..dbccac92523 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -20,17 +20,17 @@ def test_system_detached_tables(): test_table_uuid = node.query( "SELECT uuid FROM system.tables WHERE table='test_table'" - ).rstrip('\n') + ).rstrip("\n") test_table_metadata_path = node.query( "SELECT metadata_path FROM system.tables WHERE table='test_table'" - ).rstrip('\n') + ).rstrip("\n") test_table_perm_uuid = node.query( "SELECT uuid FROM system.tables WHERE table='test_table_perm'" - ).rstrip('\n') + ).rstrip("\n") test_table_perm_metadata_path = node.query( "SELECT metadata_path FROM system.tables WHERE table='test_table_perm'" - ).rstrip('\n') + ).rstrip("\n") assert "" == node.query("SELECT * FROM system.detached_tables") @@ -40,7 +40,7 @@ def test_system_detached_tables(): querry = "SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables FORMAT Values" result = node.query(querry) assert ( - result + result == f"('default','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" ) node.restart_clickhouse() @@ -54,4 +54,7 @@ def test_system_detached_tables(): node.restart_clickhouse() result = node.query(querry) - assert result == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + assert ( + result + == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + ) From bda41ab03d650978cb6765746d786b25de9ed72c Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 10:52:04 +0000 Subject: [PATCH 10/64] fix tests for CI --- src/Databases/DatabasesCommon.cpp | 8 ++++---- src/Databases/IDatabase.h | 4 +++- src/Databases/TablesLoader.cpp | 10 +++++----- .../0_stateless/03172_system_detached_tables.sql | 12 ++++++------ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 4cf914fea56..fe0baf30e57 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -2,10 +2,6 @@ #include #include -#include -#include -#include -#include #include #include #include @@ -17,6 +13,10 @@ #include #include #include +#include +#include +#include +#include namespace DB diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index e856dd3d2f0..ce5a52b1b0f 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -138,7 +138,9 @@ protected: } public: - explicit DatabaseDetachedTablesSnapshotIterator(const SnapshotDetachedTables & tables_) : snapshot(tables_), it(snapshot.begin()) { } + explicit DatabaseDetachedTablesSnapshotIterator(const SnapshotDetachedTables & tables_) : snapshot(tables_), it(snapshot.begin()) + { + } explicit DatabaseDetachedTablesSnapshotIterator(SnapshotDetachedTables && tables_) : snapshot(std::move(tables_)), it(snapshot.begin()) { diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 6aa13b7b759..1f70f46da60 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -1,14 +1,14 @@ -#include -#include +#include #include #include -#include +#include +#include #include +#include #include #include -#include #include -#include +#include namespace DB diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql index 558dfe4cf18..e361d2f34ca 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.sql +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -4,23 +4,23 @@ DROP DATABASE IF EXISTS test_system_detached_tables; CREATE DATABASE IF NOT EXISTS test_system_detached_tables; CREATE TABLE test_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; -SELECT * FROM system.detached_tables; +SELECT * FROM system.detached_tables WHERE database='test_system_detached_tables'; DETACH TABLE test_system_detached_tables.test_table; -SELECT database, table, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; ATTACH TABLE test_system_detached_tables.test_table; CREATE TABLE test_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n; -SELECT * FROM system.detached_tables; +SELECT * FROM system.detached_tables WHERE database='test_system_detached_tables'; DETACH TABLE test_system_detached_tables.test_table_perm PERMANENTLY; -SELECT database, table, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; DETACH TABLE test_system_detached_tables.test_table SYNC; -SELECT database, table, is_permanently FROM system.detached_tables; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; -SELECT database, table, is_permanently FROM system.detached_tables WHERE table='test_table'; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables' AND table='test_table'; DROP DATABASE test_system_detached_tables; From 6d1f37ad3db0078514dd76e857ef10474afd2ed4 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 15:37:27 +0000 Subject: [PATCH 11/64] support lazy db --- src/Databases/DatabaseLazy.cpp | 16 ++++++ .../03172_system_detached_tables.reference | 16 ++++-- .../03172_system_detached_tables.sql | 55 ++++++++++++++----- 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 233db07cd68..3c398e50371 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -187,6 +187,13 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); + LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name)); + if (snapshot_detached_tables.contains(table_name)) + { + LOG_DEBUG(log, "Clean info about detached table {} from snapshot.", backQuote(table_name)); + snapshot_detached_tables.erase(table_name); + } + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); } @@ -203,6 +210,15 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); + LOG_DEBUG(log, "Add info for detached table {} to snapshot.", backQuote(table_name)); + snapshot_detached_tables.emplace( + table_name, + SnapshotDetachedTable{ + .database = res->getStorageID().database_name, + .table = res->getStorageID().table_name, + .uuid = res->getStorageID().uuid, + .metadata_path = getObjectMetadataPath(table_name), + .is_permanently = false}); CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } diff --git a/tests/queries/0_stateless/03172_system_detached_tables.reference b/tests/queries/0_stateless/03172_system_detached_tables.reference index 1cf9e0275c9..ce19daa4cdc 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.reference +++ b/tests/queries/0_stateless/03172_system_detached_tables.reference @@ -1,5 +1,11 @@ -test_system_detached_tables test_table 0 -test_system_detached_tables test_table_perm 1 -test_system_detached_tables test_table 0 -test_system_detached_tables test_table_perm 1 -test_system_detached_tables test_table 0 +database atomic tests +test03172_system_detached_tables test_table 0 +test03172_system_detached_tables test_table_perm 1 +test03172_system_detached_tables test_table 0 +test03172_system_detached_tables test_table_perm 1 +test03172_system_detached_tables test_table 0 +----------------------- +database lazy tests +test03172_system_detached_tables_lazy test_table 0 +test03172_system_detached_tables_lazy test_table_perm 1 +DROP TABLE diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql index e361d2f34ca..916821d996b 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.sql +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -1,28 +1,53 @@ -- Tags: no-parallel -DROP DATABASE IF EXISTS test_system_detached_tables; -CREATE DATABASE IF NOT EXISTS test_system_detached_tables; +SELECT 'database atomic tests'; +DROP DATABASE IF EXISTS test03172_system_detached_tables; +CREATE DATABASE IF NOT EXISTS test03172_system_detached_tables; -CREATE TABLE test_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; -SELECT * FROM system.detached_tables WHERE database='test_system_detached_tables'; +CREATE TABLE test03172_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables WHERE database='test03172_system_detached_tables'; -DETACH TABLE test_system_detached_tables.test_table; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; +DETACH TABLE test03172_system_detached_tables.test_table; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; -ATTACH TABLE test_system_detached_tables.test_table; +ATTACH TABLE test03172_system_detached_tables.test_table; -CREATE TABLE test_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n; -SELECT * FROM system.detached_tables WHERE database='test_system_detached_tables'; +CREATE TABLE test03172_system_detached_tables.test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n; +SELECT * FROM system.detached_tables WHERE database='test03172_system_detached_tables'; -DETACH TABLE test_system_detached_tables.test_table_perm PERMANENTLY; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; +DETACH TABLE test03172_system_detached_tables.test_table_perm PERMANENTLY; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; -DETACH TABLE test_system_detached_tables.test_table SYNC; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables'; +DETACH TABLE test03172_system_detached_tables.test_table SYNC; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables'; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test_system_detached_tables' AND table='test_table'; +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables' AND table='test_table'; -DROP DATABASE test_system_detached_tables; +DROP DATABASE test03172_system_detached_tables SYNC; +SELECT '-----------------------'; +SELECT 'database lazy tests'; +DROP DATABASE IF EXISTS test03172_system_detached_tables_lazy; +CREATE DATABASE test03172_system_detached_tables_lazy Engine=Lazy(10); +CREATE TABLE test03172_system_detached_tables_lazy.test_table (number UInt64) engine=Log; +INSERT INTO test03172_system_detached_tables_lazy.test_table SELECT * FROM numbers(100); +DETACH TABLE test03172_system_detached_tables_lazy.test_table; + +CREATE TABLE test03172_system_detached_tables_lazy.test_table_perm (number UInt64) engine=Log; +INSERT INTO test03172_system_detached_tables_lazy.test_table_perm SELECT * FROM numbers(100); +DETACH table test03172_system_detached_tables_lazy.test_table_perm PERMANENTLY; + +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; + +ATTACH TABLE test03172_system_detached_tables_lazy.test_table; +ATTACH TABLE test03172_system_detached_tables_lazy.test_table_perm; + +SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; + +SELECT 'DROP TABLE'; +DROP TABLE test03172_system_detached_tables_lazy.test_table SYNC; +DROP TABLE test03172_system_detached_tables_lazy.test_table_perm SYNC; + +DROP DATABASE test03172_system_detached_tables_lazy SYNC; From 062198b95642a52ec0c67bbc25e7d5da45814b33 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 27 Jun 2024 15:44:37 +0000 Subject: [PATCH 12/64] add checking and exception --- src/Databases/DatabaseOnDisk.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 25e5347e017..67c9114e793 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -22,6 +22,7 @@ #include #include #include +#include "Common/Exception.h" #include #include #include @@ -309,7 +310,11 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri FS::createFile(detached_permanently_flag); std::lock_guard lock(mutex); - snapshot_detached_tables.at(table_name).is_permanently = true; + if (!snapshot_detached_tables.contains(table_name)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); + } + snapshot_detached_tables[table_name].is_permanently = true; } catch (Exception & e) { From 818e8597a22cf87c560d7986818f8a094cd7db1f Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 28 Jun 2024 12:27:40 +0000 Subject: [PATCH 13/64] Revert "general logic for tables" This reverts commit de8f1fc6e3adb6c8f346b2fda4e34c2240e1bf8e. --- src/Storages/System/ReadFromSystemTables.cpp | 157 ---- src/Storages/System/ReadFromSystemTables.h | 66 -- .../System/StorageSystemDetachedTables.cpp | 237 ++++++- src/Storages/System/StorageSystemTables.cpp | 669 +++++++++++++++++- src/Storages/System/TablesBlockSource.cpp | 564 --------------- src/Storages/System/TablesBlockSource.h | 92 --- 6 files changed, 891 insertions(+), 894 deletions(-) delete mode 100644 src/Storages/System/ReadFromSystemTables.cpp delete mode 100644 src/Storages/System/ReadFromSystemTables.h delete mode 100644 src/Storages/System/TablesBlockSource.cpp delete mode 100644 src/Storages/System/TablesBlockSource.h diff --git a/src/Storages/System/ReadFromSystemTables.cpp b/src/Storages/System/ReadFromSystemTables.cpp deleted file mode 100644 index 638cab1b63b..00000000000 --- a/src/Storages/System/ReadFromSystemTables.cpp +++ /dev/null @@ -1,157 +0,0 @@ -#include "ReadFromSystemTables.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables( - const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool need_detached_tables) -{ - Block sample{ - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - if (need_detached_tables) - { - auto table_it = database->getDetachedTablesIterator(context, {}, false); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->table()); - } - } - else - { - auto table_it = database->getTablesIterator(context); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - } - - Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -} - -ReadFromSystemTables::ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_) - : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) -{ -} - -void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - applyFiltersImpl(std::move(added_filter_nodes), false); -} - -void ReadFromSystemTables::applyFiltersImpl(ActionDAGNodes added_filter_nodes, const bool need_detached_tables_) -{ - SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); - - const ActionsDAG::Node * predicate = nullptr; - if (filter_actions_dag) - predicate = filter_actions_dag->getOutputs().at(0); - - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context, need_detached_tables_); -} - -void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.init(createPipe()); -} - -ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - const size_t max_block_size_) - : ReadFromSystemTables( - column_names_, query_info_, storage_snapshot_, context_, std::move(sample_block), std::move(columns_mask_), max_block_size_) -{ -} - -void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) -{ - applyFiltersImpl(std::move(added_filter_nodes), true); -} - -void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) -{ - pipeline.init(createPipe()); -} -} diff --git a/src/Storages/System/ReadFromSystemTables.h b/src/Storages/System/ReadFromSystemTables.h deleted file mode 100644 index f8f34363159..00000000000 --- a/src/Storages/System/ReadFromSystemTables.h +++ /dev/null @@ -1,66 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -class ReadFromSystemTables : public SourceStepWithFilter -{ -public: - std::string getName() const override { return "ReadFromSystemTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_); - - void applyFilters(ActionDAGNodes added_filter_nodes) override; - -protected: - void applyFiltersImpl(ActionDAGNodes added_filter_nodes, bool need_detached_tables_); - - template - Pipe createPipe() - { - return Pipe(std::make_shared( - std::move(columns_mask), - getOutputStream().header, - max_block_size, - std::move(filtered_databases_column), - std::move(filtered_tables_column), - context)); - } - -private: - std::vector columns_mask; - size_t max_block_size; - - ColumnPtr filtered_databases_column; - ColumnPtr filtered_tables_column; -}; - -class ReadFromSystemDetachedTables : public ReadFromSystemTables -{ -public: - std::string getName() const override { return "ReadFromSystemDetachedTables"; } - void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; - - ReadFromSystemDetachedTables( - const Names & column_names_, - const SelectQueryInfo & query_info_, - const StorageSnapshotPtr & storage_snapshot_, - const ContextPtr & context_, - Block sample_block, - std::vector columns_mask_, - size_t max_block_size_); - - void applyFilters(ActionDAGNodes added_filter_nodes) override; -}; -} diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 99c15fdae0e..9805fa2a8cf 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -23,6 +22,204 @@ namespace DB { +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +class DetachedTablesBlockSource : public ISource +{ +public: + DetachedTablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr detached_tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = detached_tables_->size(); + detached_tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + { + detached_tables.insert(detached_tables_->getDataAt(idx).toString()); + } + } + + String getName() const override { return "DetachedTables"; } + +protected: + Chunk generate() override + { + if (done) + return {}; + + MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + continue; + + const bool need_to_check_access_for_tables + = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!detached_tables_it || !detached_tables_it->isValid()) + detached_tables_it = database->getDetachedTablesIterator(context, {}, false); + + for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) + { + const auto detached_table_name = detached_tables_it->table(); + + if (!detached_tables.contains(detached_table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) + continue; + + fillResultColumnsByDetachedTableIterator(result_columns); + ++rows_count; + } + } + + const UInt64 num_rows = result_columns.at(0)->size(); + done = true; + return Chunk(std::move(result_columns), num_rows); + } + +private: + const std::vector columns_mask; + const UInt64 max_block_size; + const ColumnPtr databases; + NameSet detached_tables; + DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; + + void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const + { + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->database()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->table()); + + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->uuid()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->metadataPath()); + + if (columns_mask[src_index++]) + result_columns[res_index++]->insert(detached_tables_it->isPermanently()); + } +}; + +} + +class ReadFromSystemDetachedTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemDetachedTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_); + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + StorageSystemDetachedTables::StorageSystemDetachedTables(const StorageID & table_id_) : IStorage(table_id_) { StorageInMemoryMetadata storage_metadata; @@ -60,4 +257,42 @@ void StorageSystemDetachedTables::read( query_plan.addStep(std::move(reading)); } + +ReadFromSystemDetachedTables::ReadFromSystemDetachedTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter(DataStream{.header = std::move(sample_block)}, column_names_, query_info_, storage_snapshot_, context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) +{ +} + +void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} + +void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + auto pipe = Pipe(std::make_shared( + std::move(columns_mask), + getOutputStream().header, + max_block_size, + std::move(filtered_databases_column), + std::move(filtered_tables_column), + context)); + pipeline.init(std::move(pipe)); +} } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 40190f53fd3..783b899c978 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,30 +1,29 @@ -#include #include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include +#include #include @@ -106,6 +105,628 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) setInMemoryMetadata(storage_metadata); } + +namespace +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) +{ + Block sample { + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") + }; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + + Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + + +class TablesBlockSource : public ISource +{ +public: + TablesBlockSource( + std::vector columns_mask_, + Block header, + UInt64 max_block_size_, + ColumnPtr databases_, + ColumnPtr tables_, + ContextPtr context_) + : ISource(std::move(header)) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + , databases(std::move(databases_)) + , context(Context::createCopy(context_)) + { + size_t size = tables_->size(); + tables.reserve(size); + for (size_t idx = 0; idx < size; ++idx) + tables.insert(tables_->getDataAt(idx).toString()); + } + + String getName() const override { return "Tables"; } + +protected: + Chunk generate() override + { + if (done) + return {}; + + MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); + + const auto access = context->getAccess(); + const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + size_t rows_count = 0; + while (rows_count < max_block_size) + { + if (tables_it && !tables_it->isValid()) + ++database_idx; + + while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) + { + database_name = databases->getDataAt(database_idx).toString(); + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + + if (!database) + { + /// Database was deleted just now or the user has no access. + ++database_idx; + continue; + } + + break; + } + + /// This is for temporary tables. They are output in single block regardless to max_block_size. + if (database_idx >= databases->size()) + { + if (context->hasSessionContext()) + { + Tables external_tables = context->getSessionContext()->getExternalTables(); + + for (auto & table : external_tables) + { + size_t src_index = 0; + size_t res_index = 0; + + // database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // name + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.first); + + // uuid + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getStorageID().uuid); + + // engine + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + // is_temporary + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(1u); + + // data_paths + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_path + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_modification_time + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // metadata_version + // Temporary tables does not support replication + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_database + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // dependencies_table + if (columns_mask[src_index++]) + res_columns[res_index++]->insertDefault(); + + // create_table_query + if (columns_mask[src_index++]) + { + auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); + ASTPtr ast = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + } + + // engine_full + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table.second->getName()); + + const auto & settings = context->getSettingsRef(); + while (src_index < columns_mask.size()) + { + // total_rows + if (src_index == 19 && columns_mask[src_index]) + { + if (auto total_rows = table.second->totalRows(settings)) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + // total_bytes + else if (src_index == 20 && columns_mask[src_index]) + { + if (auto total_bytes = table.second->totalBytes(settings)) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + /// Fill the rest columns with defaults + else if (columns_mask[src_index]) + res_columns[res_index++]->insertDefault(); + src_index++; + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + done = true; + return Chunk(std::move(res_columns), num_rows); + } + + const bool need_to_check_access_for_tables = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); + + if (!tables_it || !tables_it->isValid()) + tables_it = database->getTablesIterator(context); + + const bool need_table = needTable(database, getPort().getHeader()); + + for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) + { + auto table_name = tables_it->name(); + if (!tables.contains(table_name)) + continue; + + if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) + continue; + + StoragePtr table = nullptr; + TableLockHolder lock; + if (need_table) + { + table = tables_it->table(); + if (!table) + // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) + continue; + + /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) + /// and it's not protected internally by other mutexes + static const size_t DATA_PATHS_INDEX = 5; + if (columns_mask[DATA_PATHS_INDEX]) + { + lock = table->tryLockForShare(context->getCurrentQueryId(), + context->getSettingsRef().lock_acquire_timeout); + if (!lock) + // Table was dropped while acquiring the lock, skipping table + continue; + } + } + ++rows_count; + + size_t src_index = 0; + size_t res_index = 0; + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(table_name); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(tables_it->uuid()); + + if (columns_mask[src_index++]) + { + chassert(table != nullptr); + res_columns[res_index++]->insert(table->getName()); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(0u); // is_temporary + + if (columns_mask[src_index++]) + { + chassert(lock != nullptr); + Array table_paths_array; + auto paths = table->getDataPaths(); + table_paths_array.reserve(paths.size()); + for (const String & path : paths) + table_paths_array.push_back(path); + res_columns[res_index++]->insert(table_paths_array); + /// We don't need the lock anymore + lock = nullptr; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); + + StorageMetadataPtr metadata_snapshot; + if (table) + metadata_snapshot = table->getInMemoryMetadataPtr(); + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && table->supportsReplication()) + res_columns[res_index++]->insert(metadata_snapshot->metadata_version); + else + res_columns[res_index++]->insertDefault(); + } + + { + Array views_table_name_array; + Array views_database_name_array; + if (columns_mask[src_index] || columns_mask[src_index + 1]) + { + const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); + + views_table_name_array.reserve(view_ids.size()); + views_database_name_array.reserve(view_ids.size()); + for (const auto & view_id : view_ids) + { + views_table_name_array.push_back(view_id.table_name); + views_database_name_array.push_back(view_id.database_name); + } + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_database_name_array); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(views_table_name_array); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) + { + ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); + auto * ast_create = ast ? ast->as() : nullptr; + + if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) + { + ast_create->uuid = UUIDHelpers::Nil; + ast_create->to_inner_uuid = UUIDHelpers::Nil; + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); + + if (columns_mask[src_index++]) + { + String engine_full; + + if (ast_create && ast_create->storage) + { + engine_full = format({context, *ast_create->storage}); + + static const char * const extra_head = " ENGINE = "; + if (startsWith(engine_full, extra_head)) + engine_full = engine_full.substr(strlen(extra_head)); + } + + res_columns[res_index++]->insert(engine_full); + } + + if (columns_mask[src_index++]) + { + String as_select; + if (ast_create && ast_create->select) + as_select = format({context, *ast_create->select}); + res_columns[res_index++]->insert(as_select); + } + } + else + src_index += 3; + + ASTPtr expression_ptr; + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) + res_columns[res_index++]->insert(format({context, *expression_ptr})); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto policy = table ? table->getStoragePolicy() : nullptr; + if (policy) + res_columns[res_index++]->insert(policy->getName()); + else + res_columns[res_index++]->insertDefault(); + } + + auto settings = context->getSettingsRef(); + settings.select_sequential_consistency = 0; + if (columns_mask[src_index++]) + { + auto total_rows = table ? table->totalRows(settings) : std::nullopt; + if (total_rows) + res_columns[res_index++]->insert(*total_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes = table->totalBytes(settings); + if (total_bytes) + res_columns[res_index++]->insert(*total_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); + if (total_bytes_uncompressed) + res_columns[res_index++]->insert(*total_bytes_uncompressed); + else + res_columns[res_index++]->insertDefault(); + } + + auto table_merge_tree = std::dynamic_pointer_cast(table); + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table_merge_tree) + { + res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); + } + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; + if (lifetime_rows) + res_columns[res_index++]->insert(*lifetime_rows); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; + if (lifetime_bytes) + res_columns[res_index++]->insert(*lifetime_bytes); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (metadata_snapshot) + res_columns[res_index++]->insert(metadata_snapshot->comment); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index++]) + { + if (table) + res_columns[res_index++]->insert(table->storesDataOnDisk()); + else + res_columns[res_index++]->insertDefault(); + } + + if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) + { + auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); + auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); + + Array dependencies_databases; + Array dependencies_tables; + dependencies_databases.reserve(dependencies.size()); + dependencies_tables.reserve(dependencies.size()); + for (const auto & dependency : dependencies) + { + dependencies_databases.push_back(dependency.database_name); + dependencies_tables.push_back(dependency.table_name); + } + + Array dependents_databases; + Array dependents_tables; + dependents_databases.reserve(dependents.size()); + dependents_tables.reserve(dependents.size()); + for (const auto & dependent : dependents) + { + dependents_databases.push_back(dependent.database_name); + dependents_tables.push_back(dependent.table_name); + } + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependencies_tables); + + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_databases); + if (columns_mask[src_index++]) + res_columns[res_index++]->insert(dependents_tables); + + } + } + } + + UInt64 num_rows = res_columns.at(0)->size(); + return Chunk(std::move(res_columns), num_rows); + } +private: + std::vector columns_mask; + UInt64 max_block_size; + ColumnPtr databases; + NameSet tables; + size_t database_idx = 0; + DatabaseTablesIteratorPtr tables_it; + ContextPtr context; + bool done = false; + DatabasePtr database; + std::string database_name; +}; + +} + +class ReadFromSystemTables : public SourceStepWithFilter +{ +public: + std::string getName() const override { return "ReadFromSystemTables"; } + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; + + ReadFromSystemTables( + const Names & column_names_, + const SelectQueryInfo & query_info_, + const StorageSnapshotPtr & storage_snapshot_, + const ContextPtr & context_, + Block sample_block, + std::vector columns_mask_, + size_t max_block_size_) + : SourceStepWithFilter( + DataStream{.header = std::move(sample_block)}, + column_names_, + query_info_, + storage_snapshot_, + context_) + , columns_mask(std::move(columns_mask_)) + , max_block_size(max_block_size_) + { + } + + void applyFilters(ActionDAGNodes added_filter_nodes) override; + +private: + std::vector columns_mask; + size_t max_block_size; + + ColumnPtr filtered_databases_column; + ColumnPtr filtered_tables_column; +}; + void StorageSystemTables::read( QueryPlan & query_plan, const Names & column_names, @@ -126,4 +747,24 @@ void StorageSystemTables::read( query_plan.addStep(std::move(reading)); } + +void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) +{ + SourceStepWithFilter::applyFilters(std::move(added_filter_nodes)); + + const ActionsDAG::Node * predicate = nullptr; + if (filter_actions_dag) + predicate = filter_actions_dag->getOutputs().at(0); + + filtered_databases_column = getFilteredDatabases(predicate, context); + filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); +} + +void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) +{ + Pipe pipe(std::make_shared( + std::move(columns_mask), getOutputStream().header, max_block_size, std::move(filtered_databases_column), std::move(filtered_tables_column), context)); + pipeline.init(std::move(pipe)); +} + } diff --git a/src/Storages/System/TablesBlockSource.cpp b/src/Storages/System/TablesBlockSource.cpp deleted file mode 100644 index d5481856f1b..00000000000 --- a/src/Storages/System/TablesBlockSource.cpp +++ /dev/null @@ -1,564 +0,0 @@ -#include "TablesBlockSource.h" - -#include - -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace -{ - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = {"database", "name", "uuid", "metadata_modification_time"}; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - -} - -Chunk TablesBlockSource::generate() -{ - if (done) - return {}; - - MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - size_t rows_count = 0; - while (rows_count < max_block_size) - { - if (tables_it && !tables_it->isValid()) - ++database_idx; - - while (database_idx < databases->size() && (!tables_it || !tables_it->isValid())) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - { - /// Database was deleted just now or the user has no access. - ++database_idx; - continue; - } - - break; - } - - /// This is for temporary tables. They are output in single block regardless to max_block_size. - if (database_idx >= databases->size()) - { - if (context->hasSessionContext()) - { - Tables external_tables = context->getSessionContext()->getExternalTables(); - - for (auto & table : external_tables) - { - size_t src_index = 0; - size_t res_index = 0; - - // database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // name - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.first); - - // uuid - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getStorageID().uuid); - - // engine - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - // is_temporary - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(1u); - - // data_paths - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_path - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_modification_time - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // metadata_version - // Temporary tables does not support replication - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_database - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // dependencies_table - if (columns_mask[src_index++]) - res_columns[res_index++]->insertDefault(); - - // create_table_query - if (columns_mask[src_index++]) - { - auto temp_db = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - ASTPtr ast - = temp_db ? temp_db->tryGetCreateTableQuery(table.second->getStorageID().getTableName(), context) : nullptr; - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - } - - // engine_full - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table.second->getName()); - - const auto & settings = context->getSettingsRef(); - while (src_index < columns_mask.size()) - { - // total_rows - if (src_index == 19 && columns_mask[src_index]) - { - if (auto total_rows = table.second->totalRows(settings)) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - // total_bytes - else if (src_index == 20 && columns_mask[src_index]) - { - if (auto total_bytes = table.second->totalBytes(settings)) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - /// Fill the rest columns with defaults - else if (columns_mask[src_index]) - res_columns[res_index++]->insertDefault(); - src_index++; - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - done = true; - return Chunk(std::move(res_columns), num_rows); - } - - const bool need_to_check_access_for_tables - = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(context); - - const bool need_table = needTable(database, getPort().getHeader()); - - for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) - { - auto table_name = tables_it->name(); - if (!tables.contains(table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name)) - continue; - - StoragePtr table = nullptr; - TableLockHolder lock; - if (need_table) - { - table = tables_it->table(); - if (!table) - // Table might have just been removed or detached for Lazy engine (see DatabaseLazy::tryGetTable()) - continue; - - /// The only column that requires us to hold a shared lock is data_paths as rename might alter them (on ordinary tables) - /// and it's not protected internally by other mutexes - static const size_t DATA_PATHS_INDEX = 5; - if (columns_mask[DATA_PATHS_INDEX]) - { - lock = table->tryLockForShare(context->getCurrentQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!lock) - // Table was dropped while acquiring the lock, skipping table - continue; - } - } - ++rows_count; - - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(table_name); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->uuid()); - - if (columns_mask[src_index++]) - { - chassert(table != nullptr); - res_columns[res_index++]->insert(table->getName()); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(0u); // is_temporary - - if (columns_mask[src_index++]) - { - chassert(lock != nullptr); - Array table_paths_array; - auto paths = table->getDataPaths(); - table_paths_array.reserve(paths.size()); - for (const String & path : paths) - table_paths_array.push_back(path); - res_columns[res_index++]->insert(table_paths_array); - /// We don't need the lock anymore - lock = nullptr; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(database->getObjectMetadataPath(table_name)); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(static_cast(database->getObjectMetadataModificationTime(table_name))); - - StorageMetadataPtr metadata_snapshot; - if (table) - metadata_snapshot = table->getInMemoryMetadataPtr(); - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && table->supportsReplication()) - res_columns[res_index++]->insert(metadata_snapshot->metadata_version); - else - res_columns[res_index++]->insertDefault(); - } - - { - Array views_table_name_array; - Array views_database_name_array; - if (columns_mask[src_index] || columns_mask[src_index + 1]) - { - const auto view_ids = DatabaseCatalog::instance().getDependentViews(StorageID(database_name, table_name)); - - views_table_name_array.reserve(view_ids.size()); - views_database_name_array.reserve(view_ids.size()); - for (const auto & view_id : view_ids) - { - views_table_name_array.push_back(view_id.table_name); - views_database_name_array.push_back(view_id.database_name); - } - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_database_name_array); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(views_table_name_array); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2]) - { - ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); - auto * ast_create = ast ? ast->as() : nullptr; - - if (ast_create && !context->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) - { - ast_create->uuid = UUIDHelpers::Nil; - ast_create->to_inner_uuid = UUIDHelpers::Nil; - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(ast ? format({context, *ast}) : ""); - - if (columns_mask[src_index++]) - { - String engine_full; - - if (ast_create && ast_create->storage) - { - engine_full = format({context, *ast_create->storage}); - - static const char * const extra_head = " ENGINE = "; - if (startsWith(engine_full, extra_head)) - engine_full = engine_full.substr(strlen(extra_head)); - } - - res_columns[res_index++]->insert(engine_full); - } - - if (columns_mask[src_index++]) - { - String as_select; - if (ast_create && ast_create->select) - as_select = format({context, *ast_create->select}); - res_columns[res_index++]->insert(as_select); - } - } - else - src_index += 3; - - ASTPtr expression_ptr; - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPartitionKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot && (expression_ptr = metadata_snapshot->getSamplingKeyAST())) - res_columns[res_index++]->insert(format({context, *expression_ptr})); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto policy = table ? table->getStoragePolicy() : nullptr; - if (policy) - res_columns[res_index++]->insert(policy->getName()); - else - res_columns[res_index++]->insertDefault(); - } - - auto settings = context->getSettingsRef(); - settings.select_sequential_consistency = 0; - if (columns_mask[src_index++]) - { - auto total_rows = table ? table->totalRows(settings) : std::nullopt; - if (total_rows) - res_columns[res_index++]->insert(*total_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes = table->totalBytes(settings); - if (total_bytes) - res_columns[res_index++]->insert(*total_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto total_bytes_uncompressed = table->totalBytesUncompressed(settings); - if (total_bytes_uncompressed) - res_columns[res_index++]->insert(*total_bytes_uncompressed); - else - res_columns[res_index++]->insertDefault(); - } - - auto table_merge_tree = std::dynamic_pointer_cast(table); - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getAllPartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - res_columns[res_index++]->insert(table_merge_tree->getActivePartsCount()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table_merge_tree) - { - res_columns[res_index++]->insert(table_merge_tree->getTotalMarksCount()); - } - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_rows = table ? table->lifetimeRows() : std::nullopt; - if (lifetime_rows) - res_columns[res_index++]->insert(*lifetime_rows); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - auto lifetime_bytes = table ? table->lifetimeBytes() : std::nullopt; - if (lifetime_bytes) - res_columns[res_index++]->insert(*lifetime_bytes); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (metadata_snapshot) - res_columns[res_index++]->insert(metadata_snapshot->comment); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index++]) - { - if (table) - res_columns[res_index++]->insert(table->storesDataOnDisk()); - else - res_columns[res_index++]->insertDefault(); - } - - if (columns_mask[src_index] || columns_mask[src_index + 1] || columns_mask[src_index + 2] || columns_mask[src_index + 3]) - { - auto dependencies = DatabaseCatalog::instance().getLoadingDependencies(StorageID{database_name, table_name}); - auto dependents = DatabaseCatalog::instance().getLoadingDependents(StorageID{database_name, table_name}); - - Array dependencies_databases; - Array dependencies_tables; - dependencies_databases.reserve(dependencies.size()); - dependencies_tables.reserve(dependencies.size()); - for (const auto & dependency : dependencies) - { - dependencies_databases.push_back(dependency.database_name); - dependencies_tables.push_back(dependency.table_name); - } - - Array dependents_databases; - Array dependents_tables; - dependents_databases.reserve(dependents.size()); - dependents_tables.reserve(dependents.size()); - for (const auto & dependent : dependents) - { - dependents_databases.push_back(dependent.database_name); - dependents_tables.push_back(dependent.table_name); - } - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependencies_tables); - - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_databases); - if (columns_mask[src_index++]) - res_columns[res_index++]->insert(dependents_tables); - } - } - } - - UInt64 num_rows = res_columns.at(0)->size(); - return Chunk(std::move(res_columns), num_rows); -} - -Chunk DetachedTablesBlockSource::generate() -{ - if (done) - return {}; - - MutableColumns result_columns = getPort().getHeader().cloneEmptyColumns(); - - const auto access = context->getAccess(); - const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) - { - database_name = databases->getDataAt(database_idx).toString(); - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - - if (!database) - continue; - - const bool need_to_check_access_for_tables - = need_to_check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); - - if (!detached_tables_it || !detached_tables_it->isValid()) - detached_tables_it = database->getDetachedTablesIterator(context, {}, false); - - for (; rows_count < max_block_size && detached_tables_it->isValid(); detached_tables_it->next()) - { - const auto detached_table_name = detached_tables_it->table(); - - if (!detached_tables.contains(detached_table_name)) - continue; - - if (need_to_check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, detached_table_name)) - continue; - - fillResultColumnsByDetachedTableIterator(result_columns); - ++rows_count; - } - } - - const UInt64 num_rows = result_columns.at(0)->size(); - done = true; - return Chunk(std::move(result_columns), num_rows); -} - -void DetachedTablesBlockSource::fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const -{ - size_t src_index = 0; - size_t res_index = 0; - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->database()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->table()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->uuid()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->metadataPath()); - - if (columns_mask[src_index++]) - result_columns[res_index++]->insert(detached_tables_it->isPermanently()); -} -} diff --git a/src/Storages/System/TablesBlockSource.h b/src/Storages/System/TablesBlockSource.h deleted file mode 100644 index a9f2226dca8..00000000000 --- a/src/Storages/System/TablesBlockSource.h +++ /dev/null @@ -1,92 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - -class TablesBlockSource : public ISource -{ -public: - TablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = tables_->size(); - tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - tables.insert(tables_->getDataAt(idx).toString()); - } - - String getName() const override { return "Tables"; } - -protected: - Chunk generate() override; - -private: - std::vector columns_mask; - UInt64 max_block_size; - ColumnPtr databases; - NameSet tables; - size_t database_idx = 0; - DatabaseTablesIteratorPtr tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; -}; - -class DetachedTablesBlockSource : public ISource -{ -public: - DetachedTablesBlockSource( - std::vector columns_mask_, - Block header, - UInt64 max_block_size_, - ColumnPtr databases_, - ColumnPtr detached_tables_, - ContextPtr context_) - : ISource(std::move(header)) - , columns_mask(std::move(columns_mask_)) - , max_block_size(max_block_size_) - , databases(std::move(databases_)) - , context(Context::createCopy(context_)) - { - size_t size = detached_tables_->size(); - detached_tables.reserve(size); - for (size_t idx = 0; idx < size; ++idx) - { - detached_tables.insert(detached_tables_->getDataAt(idx).toString()); - } - } - - String getName() const override { return "DetachedTables"; } - -protected: - Chunk generate() override; - -private: - const std::vector columns_mask; - const UInt64 max_block_size; - const ColumnPtr databases; - NameSet detached_tables; - DatabaseDetachedTablesSnapshotIteratorPtr detached_tables_it; - ContextPtr context; - bool done = false; - DatabasePtr database; - std::string database_name; - - void fillResultColumnsByDetachedTableIterator(MutableColumns & result_columns) const; -}; -} From 51d293beb9dbbe915c6bedb92613833db0240450 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 28 Jun 2024 15:29:28 +0000 Subject: [PATCH 14/64] general logic in detail --- .../System/StorageSystemDetachedTables.cpp | 68 +----- src/Storages/System/StorageSystemTables.cpp | 230 ++++++++++-------- src/Storages/System/StorageSystemTables.h | 9 + 3 files changed, 139 insertions(+), 168 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 9805fa2a8cf..560056b891a 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -25,69 +26,6 @@ namespace DB namespace { -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample{ - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - auto table_it = database->getDetachedTablesIterator(context, {}, false); - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->table()); - } - } - - Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - class DetachedTablesBlockSource : public ISource { public: @@ -280,8 +218,8 @@ void ReadFromSystemDetachedTables::applyFilters(ActionDAGNodes added_filter_node if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); + filtered_databases_column = detail::getFilteredDatabases(predicate, context); + filtered_tables_column = detail::getFilteredTables(predicate, filtered_databases_column, context, true); } void ReadFromSystemDetachedTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 783b899c978..dd7f1a6fdc8 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -1,29 +1,30 @@ +#include + +#include #include -#include +#include #include #include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include #include #include #include #include #include -#include +#include +#include +#include +#include +#include +#include #include @@ -31,6 +32,117 @@ namespace DB { +namespace +{ + +/// Avoid heavy operation on tables if we only queried columns that we can get without table object. +/// Otherwise it will require table initialization for Lazy database. +bool needTable(const DatabasePtr & database, const Block & header) +{ + if (database->getEngineName() != "Lazy") + return true; + + static const std::set columns_without_table = {"database", "name", "uuid", "metadata_modification_time"}; + for (const auto & column : header.getColumnsWithTypeAndName()) + { + if (columns_without_table.find(column.name) == columns_without_table.end()) + return true; + } + return false; +} + +template +void fillColumns(T table_it, MutableColumnPtr & database_column, MutableColumnPtr & engine_column) +{ + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } +} + +} + +namespace detail +{ +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) +{ + MutableColumnPtr column = ColumnString::create(); + + const auto databases = DatabaseCatalog::instance().getDatabases(); + for (const auto & database_name : databases | boost::adaptors::map_keys) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; /// We don't want to show the internal database for temporary tables in system.tables + + column->insert(database_name); + } + + Block block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "database")}; + VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); + return block.getByPosition(0).column; +} + +ColumnPtr getFilteredTables( + const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, const bool is_detached) +{ + Block sample{ + ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), + ColumnWithTypeAndName(nullptr, std::make_shared(), "engine")}; + + MutableColumnPtr database_column = ColumnString::create(); + MutableColumnPtr engine_column; + + auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); + if (dag) + { + bool filter_by_engine = false; + for (const auto * input : dag->getInputs()) + if (input->result_name == "engine") + filter_by_engine = true; + + if (filter_by_engine) + engine_column = ColumnString::create(); + } + + for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) + { + const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); + DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + continue; + + if (is_detached) + { + auto table_it = database->getDetachedTablesIterator(context, {}, false); + for (; table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->table()); + } + } + else + { + for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) + { + database_column->insert(table_it->name()); + if (engine_column) + engine_column->insert(table_it->table()->getName()); + } + } + } + + Block block{ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; + if (engine_column) + block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); + + if (dag) + VirtualColumnUtils::filterBlockWithDAG(dag, block, context); + + return block.getByPosition(0).column; +} + +} StorageSystemTables::StorageSystemTables(const StorageID & table_id_) : IStorage(table_id_) @@ -105,92 +217,6 @@ StorageSystemTables::StorageSystemTables(const StorageID & table_id_) setInMemoryMetadata(storage_metadata); } - -namespace -{ - -ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context) -{ - MutableColumnPtr column = ColumnString::create(); - - const auto databases = DatabaseCatalog::instance().getDatabases(); - for (const auto & database_name : databases | boost::adaptors::map_keys) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - continue; /// We don't want to show the internal database for temporary tables in system.tables - - column->insert(database_name); - } - - Block block { ColumnWithTypeAndName(std::move(column), std::make_shared(), "database") }; - VirtualColumnUtils::filterBlockWithPredicate(predicate, block, context); - return block.getByPosition(0).column; -} - -ColumnPtr getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context) -{ - Block sample { - ColumnWithTypeAndName(nullptr, std::make_shared(), "name"), - ColumnWithTypeAndName(nullptr, std::make_shared(), "engine") - }; - - MutableColumnPtr database_column = ColumnString::create(); - MutableColumnPtr engine_column; - - auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(predicate, &sample); - if (dag) - { - bool filter_by_engine = false; - for (const auto * input : dag->getInputs()) - if (input->result_name == "engine") - filter_by_engine = true; - - if (filter_by_engine) - engine_column = ColumnString::create(); - } - - for (size_t database_idx = 0; database_idx < filtered_databases_column->size(); ++database_idx) - { - const auto & database_name = filtered_databases_column->getDataAt(database_idx).toString(); - DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - continue; - - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } - } - - Block block {ColumnWithTypeAndName(std::move(database_column), std::make_shared(), "name")}; - if (engine_column) - block.insert(ColumnWithTypeAndName(std::move(engine_column), std::make_shared(), "engine")); - - if (dag) - VirtualColumnUtils::filterBlockWithDAG(dag, block, context); - - return block.getByPosition(0).column; -} - -/// Avoid heavy operation on tables if we only queried columns that we can get without table object. -/// Otherwise it will require table initialization for Lazy database. -bool needTable(const DatabasePtr & database, const Block & header) -{ - if (database->getEngineName() != "Lazy") - return true; - - static const std::set columns_without_table = { "database", "name", "uuid", "metadata_modification_time" }; - for (const auto & column : header.getColumnsWithTypeAndName()) - { - if (columns_without_table.find(column.name) == columns_without_table.end()) - return true; - } - return false; -} - - class TablesBlockSource : public ISource { public: @@ -690,8 +716,6 @@ private: std::string database_name; }; -} - class ReadFromSystemTables : public SourceStepWithFilter { public: @@ -756,8 +780,8 @@ void ReadFromSystemTables::applyFilters(ActionDAGNodes added_filter_nodes) if (filter_actions_dag) predicate = filter_actions_dag->getOutputs().at(0); - filtered_databases_column = getFilteredDatabases(predicate, context); - filtered_tables_column = getFilteredTables(predicate, filtered_databases_column, context); + filtered_databases_column = detail::getFilteredDatabases(predicate, context); + filtered_tables_column = detail::getFilteredTables(predicate, filtered_databases_column, context, false); } void ReadFromSystemTables::initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 87cdf1b6a8e..d6e3996b8e3 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -8,6 +8,15 @@ namespace DB class Context; +namespace detail +{ + +ColumnPtr getFilteredDatabases(const ActionsDAG::Node * predicate, ContextPtr context); +ColumnPtr +getFilteredTables(const ActionsDAG::Node * predicate, const ColumnPtr & filtered_databases_column, ContextPtr context, bool is_detached); + +} + /** Implements the system table `tables`, which allows you to get information about all tables. */ From f76bd8ed776bc34829a7ff2de394936dc6035d01 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Tue, 2 Jul 2024 14:58:04 +0000 Subject: [PATCH 15/64] apply some comments --- src/Databases/DatabaseAtomic.cpp | 6 ------ src/Databases/DatabaseAtomic.h | 3 --- src/Databases/DatabaseOnDisk.cpp | 9 ++++++--- src/Databases/TablesLoader.cpp | 10 +++++----- src/Storages/System/StorageSystemTables.cpp | 12 ------------ .../0_stateless/03172_system_detached_tables.sql | 2 +- 6 files changed, 12 insertions(+), 30 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index ddc30627cde..b8d7315fc0f 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -437,12 +437,6 @@ DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::Fil return std::make_unique(std::move(typeid_cast(*base_iter))); } -DatabaseDetachedTablesSnapshotIteratorPtr DatabaseAtomic::getDetachedTablesIterator( - ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name, const bool skip_not_loaded) const -{ - return DatabaseOrdinary::getDetachedTablesIterator(local_context, filter_by_table_name, skip_not_loaded); -} - UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const { if (auto table = tryGetTable(table_name, getContext())) diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 25c5cdfbde1..4a4ccfa2573 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -49,9 +49,6 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; - DatabaseDetachedTablesSnapshotIteratorPtr - getDetachedTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; - void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 67c9114e793..61de0727ba6 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -22,8 +22,8 @@ #include #include #include -#include "Common/Exception.h" #include +#include #include #include #include @@ -310,11 +310,14 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri FS::createFile(detached_permanently_flag); std::lock_guard lock(mutex); - if (!snapshot_detached_tables.contains(table_name)) + if (const auto it = snapshot_detached_tables.find(table_name); it == snapshot_detached_tables.end()) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Snapshot doesn't contain info about detached table={}", table_name); } - snapshot_detached_tables[table_name].is_permanently = true; + else + { + it->second.is_permanently = true; + } } catch (Exception & e) { diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index 1f70f46da60..6aa13b7b759 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -1,14 +1,14 @@ -#include +#include +#include #include #include -#include -#include -#include #include +#include #include #include -#include #include +#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index dd7f1a6fdc8..43b761d84b1 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -50,18 +50,6 @@ bool needTable(const DatabasePtr & database, const Block & header) } return false; } - -template -void fillColumns(T table_it, MutableColumnPtr & database_column, MutableColumnPtr & engine_column) -{ - for (; table_it->isValid(); table_it->next()) - { - database_column->insert(table_it->name()); - if (engine_column) - engine_column->insert(table_it->table()->getName()); - } -} - } namespace detail diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql index 916821d996b..64f30a9d275 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.sql +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -2,7 +2,7 @@ SELECT 'database atomic tests'; DROP DATABASE IF EXISTS test03172_system_detached_tables; -CREATE DATABASE IF NOT EXISTS test03172_system_detached_tables; +CREATE DATABASE IF NOT EXISTS test03172_system_detached_tables ENGINE=Atomic; CREATE TABLE test03172_system_detached_tables.test_table (n Int64) ENGINE=MergeTree ORDER BY n; SELECT * FROM system.detached_tables WHERE database='test03172_system_detached_tables'; From 192ada5ed93f93c17f26671336c6c62e53a1b669 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 3 Jul 2024 08:37:50 +0000 Subject: [PATCH 16/64] apply comments --- src/Databases/DatabaseAtomic.cpp | 1 - src/Storages/System/StorageSystemDetachedTables.cpp | 13 +++++++++---- .../03172_system_detached_tables.reference | 4 ++-- .../0_stateless/03172_system_detached_tables.sql | 4 ++-- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b8d7315fc0f..65672f0583c 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -103,7 +103,6 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name, auto table_id = table->getStorageID(); assertDetachedTableNotInUse(table_id.uuid); DatabaseOrdinary::attachTableUnlocked(name, table); - detached_tables.erase(table_id.uuid); table_name_to_path.emplace(std::make_pair(name, relative_table_path)); } diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 560056b891a..158edb7818f 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -31,12 +31,12 @@ class DetachedTablesBlockSource : public ISource public: DetachedTablesBlockSource( std::vector columns_mask_, - Block header, + Block header_, UInt64 max_block_size_, ColumnPtr databases_, ColumnPtr detached_tables_, ContextPtr context_) - : ISource(std::move(header)) + : ISource(std::move(header_)) , columns_mask(std::move(columns_mask_)) , max_block_size(max_block_size_) , databases(std::move(databases_)) @@ -63,7 +63,9 @@ protected: const auto access = context->getAccess(); const bool need_to_check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - for (size_t database_idx = 0, rows_count = 0; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) + size_t database_idx = 0; + size_t rows_count = 0; + for (; database_idx < databases->size() && rows_count < max_block_size; ++database_idx) { database_name = databases->getDataAt(database_idx).toString(); database = DatabaseCatalog::instance().tryGetDatabase(database_name); @@ -92,8 +94,11 @@ protected: } } + if (databases->size() == database_idx && max_block_size != rows_count) + { + done = true; + } const UInt64 num_rows = result_columns.at(0)->size(); - done = true; return Chunk(std::move(result_columns), num_rows); } diff --git a/tests/queries/0_stateless/03172_system_detached_tables.reference b/tests/queries/0_stateless/03172_system_detached_tables.reference index ce19daa4cdc..83d1ff13942 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.reference +++ b/tests/queries/0_stateless/03172_system_detached_tables.reference @@ -6,6 +6,6 @@ test03172_system_detached_tables test_table_perm 1 test03172_system_detached_tables test_table 0 ----------------------- database lazy tests -test03172_system_detached_tables_lazy test_table 0 -test03172_system_detached_tables_lazy test_table_perm 1 +before attach test03172_system_detached_tables_lazy test_table 0 +before attach test03172_system_detached_tables_lazy test_table_perm 1 DROP TABLE diff --git a/tests/queries/0_stateless/03172_system_detached_tables.sql b/tests/queries/0_stateless/03172_system_detached_tables.sql index 64f30a9d275..1a3c2d7cc0f 100644 --- a/tests/queries/0_stateless/03172_system_detached_tables.sql +++ b/tests/queries/0_stateless/03172_system_detached_tables.sql @@ -39,12 +39,12 @@ CREATE TABLE test03172_system_detached_tables_lazy.test_table_perm (number UInt6 INSERT INTO test03172_system_detached_tables_lazy.test_table_perm SELECT * FROM numbers(100); DETACH table test03172_system_detached_tables_lazy.test_table_perm PERMANENTLY; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; +SELECT 'before attach', database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; ATTACH TABLE test03172_system_detached_tables_lazy.test_table; ATTACH TABLE test03172_system_detached_tables_lazy.test_table_perm; -SELECT database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; +SELECT 'after attach', database, table, is_permanently FROM system.detached_tables WHERE database='test03172_system_detached_tables_lazy'; SELECT 'DROP TABLE'; DROP TABLE test03172_system_detached_tables_lazy.test_table SYNC; From 7616f58a0bed80525385879ac3a2a4dcdf2707f4 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 3 Jul 2024 11:36:04 +0000 Subject: [PATCH 17/64] add tests for another db engines --- .../configs/remote_servers.xml | 13 ++++ .../test_system_detached_tables/test.py | 69 +++++++++++++------ 2 files changed, 61 insertions(+), 21 deletions(-) create mode 100644 tests/integration/test_system_detached_tables/configs/remote_servers.xml diff --git a/tests/integration/test_system_detached_tables/configs/remote_servers.xml b/tests/integration/test_system_detached_tables/configs/remote_servers.xml new file mode 100644 index 00000000000..8b579724282 --- /dev/null +++ b/tests/integration/test_system_detached_tables/configs/remote_servers.xml @@ -0,0 +1,13 @@ + + + + + true + + replica1 + 9000 + + + + + \ No newline at end of file diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index dbccac92523..f2bb5d71e85 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -2,8 +2,12 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node_default", stay_alive=True) - +node = cluster.add_instance("replica1", + with_zookeeper=True, + main_configs=["configs/remote_servers.xml"], + macros={"replica": "replica1"}, + stay_alive=True, +) @pytest.fixture(scope="module", autouse=True) def start_cluster(): @@ -13,10 +17,25 @@ def start_cluster(): finally: cluster.shutdown() +@pytest.mark.parametrize( + "db_name,db_engine,table_engine,table_engine_params", + [ + pytest.param( + "test_db_atomic", "Atomic", "MergeTree", "ORDER BY n", id="Atomic db with MergeTree table" + ), + pytest.param( + "test_db_lazy", "Lazy(60)", "Log", "", id="Lazy db with Log table" + ), + pytest.param( + "test_db_repl", "Replicated('/clickhouse/tables/test_table', 'shard1', 'replica1')", "ReplicatedMergeTree", "ORDER BY n", id="Replicated db with ReplicatedMergeTree table" + ), + ], +) +def test_system_detached_tables(start_cluster, db_name, db_engine, table_engine, table_engine_params): + node.query(f"CREATE DATABASE IF NOT EXISTS {db_name} ENGINE={db_engine};") -def test_system_detached_tables(): - node.query("CREATE TABLE test_table (n Int64) ENGINE=MergeTree ORDER BY n;") - node.query("CREATE TABLE test_table_perm (n Int64) ENGINE=MergeTree ORDER BY n;") + node.query(f"CREATE TABLE {db_name}.test_table (n Int64) ENGINE={table_engine} {table_engine_params};") + node.query(f"CREATE TABLE {db_name}.test_table_perm (n Int64) ENGINE={table_engine} {table_engine_params};") test_table_uuid = node.query( "SELECT uuid FROM system.tables WHERE table='test_table'" @@ -32,29 +51,37 @@ def test_system_detached_tables(): "SELECT metadata_path FROM system.tables WHERE table='test_table_perm'" ).rstrip("\n") - assert "" == node.query("SELECT * FROM system.detached_tables") + assert "" == node.query(f"SELECT * FROM system.detached_tables WHERE database='{db_name}'") - node.query("DETACH TABLE test_table") - node.query("DETACH TABLE test_table_perm PERMANENTLY") + node.query(f"SET database_replicated_always_detach_permanently=1; DETACH TABLE {db_name}.test_table") + node.query(f"DETACH TABLE {db_name}.test_table_perm PERMANENTLY") - querry = "SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables FORMAT Values" + querry = f"SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables WHERE database='{db_name}' FORMAT Values" result = node.query(querry) - assert ( - result - == f"('default','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" - ) + + if db_engine.startswith("Repl"): + expected_before_restart = f"('{db_name}','test_table',1,'{test_table_uuid}','{test_table_metadata_path}'),('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + else: + expected_before_restart = f"('{db_name}','test_table',0,'{test_table_uuid}','{test_table_metadata_path}'),('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + + assert result == expected_before_restart + + if db_engine.startswith("Lazy"): + return + node.restart_clickhouse() + if db_engine.startswith("Repl"): + expected_after_restart = expected_before_restart + else: + expected_after_restart = f"('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" + result = node.query(querry) - assert ( - result - == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" - ) + assert result == expected_after_restart node.restart_clickhouse() result = node.query(querry) - assert ( - result - == f"('default','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" - ) + assert result == expected_after_restart + + node.query(f"DROP DATABASE {db_name}") From 1bb1e0189207c458550ab677983e3efeebad23b1 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 3 Jul 2024 12:12:09 +0000 Subject: [PATCH 18/64] fix style --- .../test_system_detached_tables/test.py | 42 ++++++++++++++----- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index f2bb5d71e85..dfb55dfd99b 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -2,7 +2,8 @@ import pytest from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("replica1", +node = cluster.add_instance( + "replica1", with_zookeeper=True, main_configs=["configs/remote_servers.xml"], macros={"replica": "replica1"}, @@ -17,25 +18,42 @@ def start_cluster(): finally: cluster.shutdown() + @pytest.mark.parametrize( "db_name,db_engine,table_engine,table_engine_params", [ pytest.param( - "test_db_atomic", "Atomic", "MergeTree", "ORDER BY n", id="Atomic db with MergeTree table" + "test_db_atomic", + "Atomic", + "MergeTree", + "ORDER BY n", + id="Atomic db with MergeTree table", ), pytest.param( - "test_db_lazy", "Lazy(60)", "Log", "", id="Lazy db with Log table" + "test_db_lazy", + "Lazy(60)", + "Log", + "", + id="Lazy db with Log table", ), pytest.param( - "test_db_repl", "Replicated('/clickhouse/tables/test_table', 'shard1', 'replica1')", "ReplicatedMergeTree", "ORDER BY n", id="Replicated db with ReplicatedMergeTree table" + "test_db_repl", + "Replicated('/clickhouse/tables/test_table','shard1', 'replica1')", + "ReplicatedMergeTree", + "ORDER BY n", + id="Replicated db with ReplicatedMergeTree table", ), ], ) def test_system_detached_tables(start_cluster, db_name, db_engine, table_engine, table_engine_params): node.query(f"CREATE DATABASE IF NOT EXISTS {db_name} ENGINE={db_engine};") - node.query(f"CREATE TABLE {db_name}.test_table (n Int64) ENGINE={table_engine} {table_engine_params};") - node.query(f"CREATE TABLE {db_name}.test_table_perm (n Int64) ENGINE={table_engine} {table_engine_params};") + node.query( + f"CREATE TABLE {db_name}.test_table (n Int64) ENGINE={table_engine} {table_engine_params};" + ) + node.query( + f"CREATE TABLE {db_name}.test_table_perm (n Int64) ENGINE={table_engine} {table_engine_params};" + ) test_table_uuid = node.query( "SELECT uuid FROM system.tables WHERE table='test_table'" @@ -51,14 +69,18 @@ def test_system_detached_tables(start_cluster, db_name, db_engine, table_engine, "SELECT metadata_path FROM system.tables WHERE table='test_table_perm'" ).rstrip("\n") - assert "" == node.query(f"SELECT * FROM system.detached_tables WHERE database='{db_name}'") + assert "" == node.query( + f"SELECT * FROM system.detached_tables WHERE database='{db_name}'" + ) - node.query(f"SET database_replicated_always_detach_permanently=1; DETACH TABLE {db_name}.test_table") + node.query( + f"SET database_replicated_always_detach_permanently=1; DETACH TABLE {db_name}.test_table" + ) node.query(f"DETACH TABLE {db_name}.test_table_perm PERMANENTLY") querry = f"SELECT database, table, is_permanently, uuid, metadata_path FROM system.detached_tables WHERE database='{db_name}' FORMAT Values" result = node.query(querry) - + if db_engine.startswith("Repl"): expected_before_restart = f"('{db_name}','test_table',1,'{test_table_uuid}','{test_table_metadata_path}'),('{db_name}','test_table_perm',1,'{test_table_perm_uuid}','{test_table_perm_metadata_path}')" else: @@ -68,7 +90,7 @@ def test_system_detached_tables(start_cluster, db_name, db_engine, table_engine, if db_engine.startswith("Lazy"): return - + node.restart_clickhouse() if db_engine.startswith("Repl"): From f964b0b6ab48bd92a784a5c719817e2c82fa1f4a Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Wed, 3 Jul 2024 12:22:32 +0000 Subject: [PATCH 19/64] fix style again --- tests/integration/test_system_detached_tables/test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_system_detached_tables/test.py b/tests/integration/test_system_detached_tables/test.py index dfb55dfd99b..2eb870efcbc 100644 --- a/tests/integration/test_system_detached_tables/test.py +++ b/tests/integration/test_system_detached_tables/test.py @@ -10,6 +10,7 @@ node = cluster.add_instance( stay_alive=True, ) + @pytest.fixture(scope="module", autouse=True) def start_cluster(): try: @@ -38,14 +39,16 @@ def start_cluster(): ), pytest.param( "test_db_repl", - "Replicated('/clickhouse/tables/test_table','shard1', 'replica1')", - "ReplicatedMergeTree", + "Replicated('/clickhouse/tables/test_table','shard1', 'replica1')", + "ReplicatedMergeTree", "ORDER BY n", id="Replicated db with ReplicatedMergeTree table", ), ], ) -def test_system_detached_tables(start_cluster, db_name, db_engine, table_engine, table_engine_params): +def test_system_detached_tables( + start_cluster, db_name, db_engine, table_engine, table_engine_params +): node.query(f"CREATE DATABASE IF NOT EXISTS {db_name} ENGINE={db_engine};") node.query( From cc3e166dec6fde41c3903d486e4e18ba9b2991b2 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Thu, 4 Jul 2024 12:11:15 +0000 Subject: [PATCH 20/64] apply comment --- src/Storages/System/StorageSystemDetachedTables.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemDetachedTables.cpp b/src/Storages/System/StorageSystemDetachedTables.cpp index 158edb7818f..56c5e49b467 100644 --- a/src/Storages/System/StorageSystemDetachedTables.cpp +++ b/src/Storages/System/StorageSystemDetachedTables.cpp @@ -94,7 +94,7 @@ protected: } } - if (databases->size() == database_idx && max_block_size != rows_count) + if (databases->size() == database_idx && (!detached_tables_it || !detached_tables_it->isValid())) { done = true; } From 5e4a244faf99299f0ab4b4f105cc08561c00857b Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Fri, 5 Jul 2024 16:52:51 +0200 Subject: [PATCH 21/64] Revert "Revert "insertion deduplication on retries for materialised views"" --- src/Columns/ColumnObject.cpp | 6 + src/Columns/ColumnObject.h | 2 +- src/Common/CollectionOfDerived.h | 184 ++++ src/Core/Settings.h | 6 +- src/Interpreters/AsynchronousInsertQueue.cpp | 19 +- src/Interpreters/InterpreterCheckQuery.cpp | 18 +- src/Interpreters/InterpreterCreateQuery.cpp | 9 +- src/Interpreters/InterpreterExplainQuery.cpp | 8 +- src/Interpreters/InterpreterInsertQuery.cpp | 678 ++++++------ src/Interpreters/InterpreterInsertQuery.h | 17 +- src/Interpreters/Squashing.cpp | 124 +-- src/Interpreters/Squashing.h | 46 +- src/Interpreters/SystemLog.cpp | 8 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Processors/Chunk.cpp | 20 +- src/Processors/Chunk.h | 58 +- .../PullingAsyncPipelineExecutor.cpp | 9 +- .../Executors/PullingPipelineExecutor.cpp | 9 +- .../Formats/Impl/ParquetBlockOutputFormat.cpp | 4 +- src/Processors/IAccumulatingTransform.cpp | 5 +- .../FinishAggregatingInOrderAlgorithm.cpp | 10 +- .../Algorithms/MergeTreePartLevelInfo.h | 12 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 2 +- .../Algorithms/ReplacingSortedAlgorithm.h | 7 +- src/Processors/Merges/IMergingTransform.cpp | 2 +- src/Processors/Merges/IMergingTransform.h | 2 +- src/Processors/Sinks/RemoteSink.h | 2 +- src/Processors/Sinks/SinkToStorage.cpp | 5 +- src/Processors/Sinks/SinkToStorage.h | 5 +- src/Processors/Sources/BlocksSource.h | 5 +- src/Processors/Sources/RemoteSource.cpp | 2 +- .../Sources/SourceFromSingleChunk.cpp | 6 +- .../AggregatingInOrderTransform.cpp | 11 +- .../Transforms/AggregatingInOrderTransform.h | 5 +- .../Transforms/AggregatingTransform.cpp | 16 +- .../Transforms/AggregatingTransform.h | 3 +- .../Transforms/ApplySquashingTransform.h | 14 +- .../Transforms/CountingTransform.cpp | 5 +- .../DeduplicationTokenTransforms.cpp | 236 +++++ .../Transforms/DeduplicationTokenTransforms.h | 237 +++++ .../Transforms/ExpressionTransform.cpp | 2 + .../Transforms/JoiningTransform.cpp | 9 +- src/Processors/Transforms/JoiningTransform.h | 6 +- .../Transforms/MaterializingTransform.cpp | 1 + .../Transforms/MemoryBoundMerging.h | 6 +- ...gingAggregatedMemoryEfficientTransform.cpp | 36 +- ...ergingAggregatedMemoryEfficientTransform.h | 5 +- .../Transforms/MergingAggregatedTransform.cpp | 10 +- .../Transforms/PlanSquashingTransform.cpp | 15 +- .../Transforms/PlanSquashingTransform.h | 3 +- .../Transforms/SelectByIndicesTransform.h | 3 +- .../Transforms/SquashingTransform.cpp | 18 +- .../Transforms/TotalsHavingTransform.cpp | 6 +- .../Transforms/buildPushingToViewsChain.cpp | 127 ++- src/QueryPipeline/QueryPipelineBuilder.h | 2 +- src/QueryPipeline/QueryPlanResourceHolder.cpp | 8 +- src/QueryPipeline/QueryPlanResourceHolder.h | 3 + src/Server/TCPHandler.cpp | 20 +- src/Storages/Distributed/DistributedSink.cpp | 20 +- src/Storages/Distributed/DistributedSink.h | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 9 +- src/Storages/Kafka/StorageKafka.cpp | 8 +- src/Storages/LiveView/LiveViewSink.h | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 18 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MaterializedView/RefreshTask.cpp | 8 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 17 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.cpp | 5 +- src/Storages/MergeTree/MergeTreeSink.cpp | 68 +- src/Storages/MergeTree/MergeTreeSink.h | 3 +- src/Storages/MergeTree/MutateTask.cpp | 19 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 94 +- .../MergeTree/ReplicatedMergeTreeSink.h | 13 +- src/Storages/MessageQueueSink.cpp | 2 +- src/Storages/MessageQueueSink.h | 2 +- src/Storages/NATS/StorageNATS.cpp | 8 +- .../StorageObjectStorageSink.cpp | 4 +- .../ObjectStorage/StorageObjectStorageSink.h | 2 +- .../StorageObjectStorageQueue.cpp | 8 +- src/Storages/PartitionedSink.cpp | 4 +- src/Storages/PartitionedSink.h | 2 +- .../MaterializedPostgreSQLConsumer.cpp | 8 +- .../PostgreSQLReplicationHandler.cpp | 8 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 8 +- .../RocksDB/EmbeddedRocksDBBulkSink.cpp | 7 +- .../RocksDB/EmbeddedRocksDBBulkSink.h | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.cpp | 2 +- src/Storages/RocksDB/EmbeddedRocksDBSink.h | 2 +- .../RocksDB/StorageEmbeddedRocksDB.cpp | 3 +- src/Storages/StorageBuffer.cpp | 10 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/StorageFile.cpp | 4 +- src/Storages/StorageKeeperMap.cpp | 9 +- src/Storages/StorageLog.cpp | 8 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMongoDB.cpp | 5 +- src/Storages/StorageMySQL.cpp | 4 +- src/Storages/StoragePostgreSQL.cpp | 4 +- src/Storages/StorageRedis.cpp | 9 +- src/Storages/StorageSQLite.cpp | 2 +- src/Storages/StorageSet.cpp | 6 +- src/Storages/StorageStripeLog.cpp | 4 +- src/Storages/StorageURL.cpp | 4 +- src/Storages/StorageURL.h | 2 +- .../System/StorageSystemZooKeeper.cpp | 2 +- src/Storages/WindowView/StorageWindowView.cpp | 59 +- src/Storages/WindowView/StorageWindowView.h | 2 +- .../test_force_deduplication/test.py | 73 +- ...view_and_deduplication_zookeeper.reference | 2 +- ...lized_view_and_deduplication_zookeeper.sql | 2 +- ...lized_view_and_too_many_parts_zookeeper.sh | 4 +- .../0_stateless/01275_parallel_mv.reference | 4 +- ...01927_query_views_log_current_database.sql | 1 + ...ication_token_materialized_views.reference | 14 +- ...deduplication_token_materialized_views.sql | 8 +- .../0_stateless/02125_query_views_log.sql | 2 +- ...02912_ingestion_mv_deduplication.reference | 5 +- .../02912_ingestion_mv_deduplication.sql | 5 +- .../0_stateless/03008_deduplication.python | 657 ++++++++++++ ...08_deduplication_cases_from_docs.reference | 41 + .../03008_deduplication_cases_from_docs.sql | 331 ++++++ ...on_insert_into_partitioned_table.reference | 35 + ...lication_insert_into_partitioned_table.sql | 83 ++ ...ert_several_blocks_nonreplicated.reference | 962 ++++++++++++++++++ ...ion_insert_several_blocks_nonreplicated.sh | 59 ++ ...insert_several_blocks_replicated.reference | 962 ++++++++++++++++++ ...cation_insert_several_blocks_replicated.sh | 59 ++ ...tes_several_blocks_nonreplicated.reference | 962 ++++++++++++++++++ ..._generates_several_blocks_nonreplicated.sh | 59 ++ ...erates_several_blocks_replicated.reference | 962 ++++++++++++++++++ ..._mv_generates_several_blocks_replicated.sh | 59 ++ ..._mv_into_one_table_nonreplicated.reference | 706 +++++++++++++ ...several_mv_into_one_table_nonreplicated.sh | 59 ++ ...ral_mv_into_one_table_replicated.reference | 706 +++++++++++++ ...on_several_mv_into_one_table_replicated.sh | 59 ++ .../03035_max_insert_threads_support.sh | 2 +- 138 files changed, 8638 insertions(+), 857 deletions(-) create mode 100644 src/Common/CollectionOfDerived.h create mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.cpp create mode 100644 src/Processors/Transforms/DeduplicationTokenTransforms.h create mode 100644 tests/queries/0_stateless/03008_deduplication.python create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh create mode 100644 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference create mode 100755 tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 90ef974010c..ded56b60e64 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -1093,4 +1093,10 @@ void ColumnObject::finalize() checkObjectHasNoAmbiguosPaths(getKeys()); } +void ColumnObject::updateHashFast(SipHash & hash) const +{ + for (const auto & entry : subcolumns) + for (auto & part : entry->data.data) + part->updateHashFast(hash); +} } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index e2936b27994..b1b8827622f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -242,7 +242,7 @@ public: const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeConcrete(); } - void updateHashFast(SipHash &) const override { throwMustBeConcrete(); } + void updateHashFast(SipHash & hash) const override; void expand(const Filter &, bool) override { throwMustBeConcrete(); } bool hasEqualValues() const override { throwMustBeConcrete(); } size_t byteSizeAt(size_t) const override { throwMustBeConcrete(); } diff --git a/src/Common/CollectionOfDerived.h b/src/Common/CollectionOfDerived.h new file mode 100644 index 00000000000..97c0c3fbc06 --- /dev/null +++ b/src/Common/CollectionOfDerived.h @@ -0,0 +1,184 @@ +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/* This is a collections of objects derived from ItemBase. +* Collection contains no more than one instance for each derived type. +* The derived type is used to access the instance. +*/ + +template +class CollectionOfDerivedItems +{ +public: + using Self = CollectionOfDerivedItems; + using ItemPtr = std::shared_ptr; + +private: + struct Rec + { + std::type_index type_idx; + ItemPtr ptr; + + bool operator<(const Rec & other) const + { + return type_idx < other.type_idx; + } + + bool operator<(const std::type_index & value) const + { + return type_idx < value; + } + + bool operator==(const Rec & other) const + { + return type_idx == other.type_idx; + } + }; + using Records = std::vector; + +public: + void swap(Self & other) noexcept + { + records.swap(other.records); + } + + void clear() + { + records.clear(); + } + + bool empty() const + { + return records.empty(); + } + + size_t size() const + { + return records.size(); + } + + Self clone() const + { + Self result; + result.records.reserve(records.size()); + for (const auto & rec : records) + result.records.emplace_back(rec.type_idx, rec.ptr->clone()); + return result; + } + + void append(Self && other) + { + auto middle_idx = records.size(); + std::move(other.records.begin(), other.records.end(), std::back_inserter(records)); + std::inplace_merge(records.begin(), records.begin() + middle_idx, records.end()); + chassert(isUniqTypes()); + } + + template + void add(std::shared_ptr info) + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + return addImpl(std::type_index(typeid(T)), std::move(info)); + } + + template + std::shared_ptr get() const + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + return cast; + } + + template + std::shared_ptr extract() + { + static_assert(std::is_base_of_v, "Template parameter must inherit items base class"); + auto it = getImpl(std::type_index(typeid(T))); + if (it == records.cend()) + return nullptr; + auto cast = std::dynamic_pointer_cast(it->ptr); + chassert(cast); + + records.erase(it); + return cast; + } + + std::string debug() const + { + std::string result; + + for (auto & rec : records) + { + result.append(rec.type_idx.name()); + result.append(" "); + } + + return result; + } + +private: + bool isUniqTypes() const + { + auto uniq_it = std::adjacent_find(records.begin(), records.end()); + + return uniq_it == records.end(); + } + + void addImpl(std::type_index type_idx, ItemPtr item) + { + auto it = std::lower_bound(records.begin(), records.end(), type_idx); + + if (it == records.end()) + { + records.emplace_back(type_idx, item); + return; + } + + if (it->type_idx == type_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "inserted items must be unique by their type, type {} is inserted twice", type_idx.name()); + + + records.emplace(it, type_idx, item); + + chassert(isUniqTypes()); + } + + Records::const_iterator getImpl(std::type_index type_idx) const + { + auto it = std::lower_bound(records.cbegin(), records.cend(), type_idx); + + if (it == records.cend()) + return records.cend(); + + if (it->type_idx != type_idx) + return records.cend(); + + return it; + } + + Records records; +}; + +} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 5903dbd32eb..81d0aa0c51d 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -36,7 +36,7 @@ class IColumn; M(Dialect, dialect, Dialect::clickhouse, "Which dialect will be used to parse query", 0)\ M(UInt64, min_compress_block_size, 65536, "The actual size of the block to compress, if the uncompressed data less than max_compress_block_size is no less than this value and no less than the volume of data for one mark.", 0) \ M(UInt64, max_compress_block_size, 1048576, "The maximum size of blocks of uncompressed data before compressing for writing to a table.", 0) \ - M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \ + M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size in rows for reading", 0) \ M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \ M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \ M(UInt64, min_insert_block_size_bytes, (DEFAULT_INSERT_BLOCK_SIZE * 256), "Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough.", 0) \ @@ -634,9 +634,8 @@ class IColumn; M(Bool, optimize_time_filter_with_preimage, true, "Optimize Date and DateTime predicates by converting functions into equivalent comparisons without conversions (e.g. toYear(col) = 2023 -> col >= '2023-01-01' AND col <= '2023-12-31')", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there are constants there", 0) \ - M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ + M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert, true, "Throw exception on INSERT query when the setting `deduplicate_blocks_in_dependent_materialized_views` is enabled along with `async_insert`. It guarantees correctness, because these features can't work together.", 0) \ - M(Bool, update_insert_deduplication_token_in_dependent_materialized_views, false, "Should update insert deduplication token with table identifier during insert in dependent materialized views.", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, ignore_materialized_views_with_dropped_target_table, false, "Ignore MVs with dropped target table during pushing to views", 0) \ M(Bool, allow_experimental_refreshable_materialized_view, false, "Allow refreshable materialized views (CREATE MATERIALIZED VIEW REFRESH ...).", 0) \ @@ -953,6 +952,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index d72f3d81549..dd1166a9228 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -301,7 +301,13 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const auto & insert_query = query->as(); insert_query.async_insert_flush = true; - InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); + InterpreterInsertQuery interpreter( + query, + query_context, + query_context->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_insert */ false); auto table = interpreter.getTable(insert_query); auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); @@ -781,7 +787,12 @@ try try { interpreter = std::make_unique( - key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + key.query, + insert_context, + key.settings.insert_allow_materialized_columns, + false, + false, + true); pipeline = interpreter->execute().pipeline; chassert(pipeline.pushing()); @@ -1000,7 +1011,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( } Chunk chunk(executor.getResultColumns(), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } @@ -1052,7 +1063,7 @@ Chunk AsynchronousInsertQueue::processPreprocessedEntries( } Chunk chunk(std::move(result_columns), total_rows); - chunk.setChunkInfo(std::move(chunk_info)); + chunk.getChunkInfos().add(std::move(chunk_info)); return chunk; } diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index 4a84a7bf570..81bb6290acb 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -22,6 +23,7 @@ #include #include +#include #include #include #include @@ -91,7 +93,7 @@ Chunk getChunkFromCheckResult(const String & database, const String & table, con return Chunk(std::move(columns), 1); } -class TableCheckTask : public ChunkInfo +class TableCheckTask : public ChunkInfoCloneable { public: TableCheckTask(StorageID table_id, const std::variant & partition_or_part, ContextPtr context) @@ -110,6 +112,12 @@ public: context->checkAccess(AccessType::SHOW_TABLES, table_->getStorageID()); } + TableCheckTask(const TableCheckTask & other) + : table(other.table) + , check_data_tasks(other.check_data_tasks) + , is_finished(other.is_finished.load()) + {} + std::optional checkNext() const { if (isFinished()) @@ -121,8 +129,8 @@ public: std::this_thread::sleep_for(sleep_time); }); - IStorage::DataValidationTasksPtr check_data_tasks_ = check_data_tasks; - auto result = table->checkDataNext(check_data_tasks_); + IStorage::DataValidationTasksPtr tmp = check_data_tasks; + auto result = table->checkDataNext(tmp); is_finished = !result.has_value(); return result; } @@ -180,7 +188,7 @@ protected: /// source should return at least one row to start pipeline result.addColumn(ColumnUInt8::create(1, 1)); /// actual data stored in chunk info - result.setChunkInfo(std::move(current_check_task)); + result.getChunkInfos().add(std::move(current_check_task)); return result; } @@ -280,7 +288,7 @@ public: protected: void transform(Chunk & chunk) override { - auto table_check_task = std::dynamic_pointer_cast(chunk.getChunkInfo()); + auto table_check_task = chunk.getChunkInfos().get(); auto check_result = table_check_task->checkNext(); if (!check_result) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0ee2bb6c0e9..ee191c02ff8 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1776,8 +1776,13 @@ BlockIO InterpreterCreateQuery::fillTableIfNeeded(const ASTCreateQuery & create) else insert->select = create.select->clone(); - return InterpreterInsertQuery(insert, getContext(), - getContext()->getSettingsRef().insert_allow_materialized_columns).execute(); + return InterpreterInsertQuery( + insert, + getContext(), + getContext()->getSettingsRef().insert_allow_materialized_columns, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false).execute(); } return {}; diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 7c7b4b3f95a..26b7e074fdf 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -534,7 +534,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() } else if (dynamic_cast(ast.getExplainedQuery().get())) { - InterpreterInsertQuery insert(ast.getExplainedQuery(), getContext()); + InterpreterInsertQuery insert( + ast.getExplainedQuery(), + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = insert.execute(); printPipeline(io.pipeline.getProcessors(), buf); } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f396db70d21..2cbfc55d008 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +40,7 @@ #include #include #include +#include "base/defines.h" namespace ProfileEvents @@ -394,28 +397,358 @@ Chain InterpreterInsertQuery::buildPreSinkChain( return out; } +std::pair, std::vector> InterpreterInsertQuery::buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block) +{ + chassert(presink_streams > 0); + chassert(sink_streams > 0); + + ThreadGroupPtr running_group; + if (current_thread) + running_group = current_thread->getThreadGroup(); + if (!running_group) + running_group = std::make_shared(getContext()); + + std::vector sink_chains; + std::vector presink_chains; + + for (size_t i = 0; i < sink_streams; ++i) + { + auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, + running_group, /* elapsed_counter_ms= */ nullptr); + + sink_chains.emplace_back(std::move(out)); + } + + for (size_t i = 0; i < presink_streams; ++i) + { + auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); + presink_chains.emplace_back(std::move(out)); + } + + return {std::move(presink_chains), std::move(sink_chains)}; +} + + +QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table) +{ + const Settings & settings = getContext()->getSettingsRef(); + + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + bool is_trivial_insert_select = false; + + if (settings.optimize_trivial_insert_select) + { + const auto & select_query = query.select->as(); + const auto & selects = select_query.list_of_selects->children; + const auto & union_modes = select_query.list_of_modes; + + /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries + const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; + + is_trivial_insert_select = + std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) + && std::all_of(selects.begin(), selects.end(), isTrivialSelect); + } + + ContextPtr select_context = getContext(); + + if (is_trivial_insert_select) + { + /** When doing trivial INSERT INTO ... SELECT ... FROM table, + * don't need to process SELECT with more than max_insert_threads + * and it's reasonable to set block size for SELECT to the desired block size for INSERT + * to avoid unnecessary squashing. + */ + + Settings new_settings = select_context->getSettings(); + + new_settings.max_threads = std::max(1, settings.max_insert_threads); + + if (table->prefersLargeBlocks()) + { + if (settings.min_insert_block_size_rows) + new_settings.max_block_size = settings.min_insert_block_size_rows; + if (settings.min_insert_block_size_bytes) + new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; + } + + auto context_for_trivial_select = Context::createCopy(context); + context_for_trivial_select->setSettings(new_settings); + context_for_trivial_select->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); + + select_context = context_for_trivial_select; + } + + QueryPipelineBuilder pipeline; + + { + auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); + + if (settings.allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, select_context, select_query_options); + pipeline = interpreter_select_analyzer.buildQueryPipeline(); + } + else + { + InterpreterSelectWithUnionQuery interpreter_select(query.select, select_context, select_query_options); + pipeline = interpreter_select.buildQueryPipeline(); + } + } + + pipeline.dropTotalsAndExtremes(); + + /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. + if (getContext()->getSettingsRef().insert_null_as_default) + { + const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); + const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); + const auto & output_columns = metadata_snapshot->getColumns(); + + if (input_columns.size() == query_columns.size()) + { + for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) + { + /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with + /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. + if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) + && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) + && !isVariant(query_columns[col_idx].type) + && !isDynamic(query_columns[col_idx].type) + && output_columns.has(query_columns[col_idx].name)) + { + query_sample_block.setColumn( + col_idx, + ColumnWithTypeAndName( + makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), + makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), + query_columns[col_idx].name)); + } + } + } + } + + auto actions_dag = ActionsDAG::makeConvertingActions( + pipeline.getHeader().getColumnsWithTypeAndName(), + query_sample_block.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Position); + auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header, actions); + }); + + /// We need to convert Sparse columns to full, because it's destination storage + /// may not support it or may have different settings for applying Sparse serialization. + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + auto context_ptr = getContext(); + auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + + return counting; + }); + + size_t num_select_threads = pipeline.getNumThreads(); + + pipeline.resize(1); + + if (shouldAddSquashingFroStorage(table)) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, + table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + pipeline.addSimpleTransform([&](const Block &in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + + if (!settings.insert_deduplication_token.value.empty()) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(settings.insert_deduplication_token.value, in_header); + }); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared(in_header); + }); + } + + /// Number of streams works like this: + /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever + /// InterpreterSelectQuery ends up with. + /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. + /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. + /// * If the table supports parallel inserts, use max_insert_threads for writing to IStorage. + /// Otherwise ResizeProcessor them down to 1 stream. + + size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + + size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; + + if (!settings.parallel_view_processing) + { + auto table_id = table->getStorageID(); + auto views = DatabaseCatalog::instance().getDependentViews(table_id); + + if (table->isView() || !views.empty()) + sink_streams_size = 1; + } + + auto [presink_chains, sink_chains] = buildPreAndSinkChains( + presink_streams_size, sink_streams_size, + table, metadata_snapshot, query_sample_block); + + pipeline.resize(presink_chains.size()); + + if (shouldAddSquashingFroStorage(table)) + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table->prefersLargeBlocks() ? settings.min_insert_block_size_rows : settings.max_block_size, + table->prefersLargeBlocks() ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + + for (auto & chain : presink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(presink_chains)); + + pipeline.resize(sink_streams_size); + + for (auto & chain : sink_chains) + pipeline.addResources(chain.detachResources()); + pipeline.addChains(std::move(sink_chains)); + + if (!settings.parallel_view_processing) + { + /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. + if (pipeline.getNumThreads() > num_select_threads) + pipeline.setMaxThreads(num_select_threads); + } + else if (pipeline.getNumThreads() < settings.max_threads) + { + /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, + /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. + /// + /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. + pipeline.setMaxThreads(settings.max_threads); + } + + pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr + { + return std::make_shared(cur_header); + }); + + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); +} + + +QueryPipeline InterpreterInsertQuery::buildInsertPipeline(ASTInsertQuery & query, StoragePtr table) +{ + const Settings & settings = getContext()->getSettingsRef(); + + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); + + Chain chain; + + { + auto [presink_chains, sink_chains] = buildPreAndSinkChains( + /* presink_streams */1, /* sink_streams */1, + table, metadata_snapshot, query_sample_block); + + chain = std::move(presink_chains.front()); + chain.appendChain(std::move(sink_chains.front())); + } + + if (!settings.insert_deduplication_token.value.empty()) + { + chain.addSource(std::make_shared(chain.getInputHeader())); + chain.addSource(std::make_shared(settings.insert_deduplication_token.value, chain.getInputHeader())); + } + + chain.addSource(std::make_shared(chain.getInputHeader())); + + if (shouldAddSquashingFroStorage(table)) + { + bool table_prefers_large_blocks = table->prefersLargeBlocks(); + + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(balancing)); + } + + auto context_ptr = getContext(); + auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); + counting->setProcessListElement(context_ptr->getProcessListElement()); + counting->setProgressCallback(context_ptr->getProgressCallback()); + chain.addSource(std::move(counting)); + + QueryPipeline pipeline = QueryPipeline(std::move(chain)); + + pipeline.setNumThreads(std::min(pipeline.getNumThreads(), settings.max_threads)); + pipeline.setConcurrencyControl(settings.use_concurrency_control); + + if (query.hasInlinedData() && !async_insert) + { + /// can execute without additional data + auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); + for (auto && buffer : owned_buffers) + format->addBuffer(std::move(buffer)); + + auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); + pipeline.complete(std::move(pipe)); + } + + return pipeline; +} + + BlockIO InterpreterInsertQuery::execute() { const Settings & settings = getContext()->getSettingsRef(); auto & query = query_ptr->as(); - QueryPipelineBuilder pipeline; - std::optional distributed_pipeline; - QueryPlanResourceHolder resources; StoragePtr table = getTable(query); checkStorageSupportsTransactionsIfNeeded(table, getContext()); - StoragePtr inner_table; - if (const auto * mv = dynamic_cast(table.get())) - inner_table = mv->getTargetTable(); - if (query.partition_by && !table->supportsPartitionBy()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "PARTITION BY clause is not supported by storage"); auto table_lock = table->lockForShare(getContext()->getInitialQueryId(), settings.lock_acquire_timeout); - auto metadata_snapshot = table->getInMemoryMetadataPtr(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot, getContext(), no_destination, allow_materialized); /// For table functions we check access while executing @@ -423,320 +756,43 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - if (query.select && settings.parallel_distributed_insert_select) - // Distributed INSERT SELECT - distributed_pipeline = table->distributedWrite(query, getContext()); - - std::vector presink_chains; - std::vector sink_chains; - if (!distributed_pipeline) + if (!allow_materialized) { - /// Number of streams works like this: - /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever - /// InterpreterSelectQuery ends up with. - /// * Use `max_insert_threads` streams for various insert-preparation steps, e.g. - /// materializing and squashing (too slow to do in one thread). That's `presink_chains`. - /// * If the table supports parallel inserts, use the same streams for writing to IStorage. - /// Otherwise ResizeProcessor them down to 1 stream. - /// * If it's not an INSERT SELECT, forget all that and use one stream. - size_t pre_streams_size = 1; - size_t sink_streams_size = 1; - - if (query.select) - { - bool is_trivial_insert_select = false; - - if (settings.optimize_trivial_insert_select) - { - const auto & select_query = query.select->as(); - const auto & selects = select_query.list_of_selects->children; - const auto & union_modes = select_query.list_of_modes; - - /// ASTSelectWithUnionQuery is not normalized now, so it may pass some queries which can be Trivial select queries - const auto mode_is_all = [](const auto & mode) { return mode == SelectUnionMode::UNION_ALL; }; - - is_trivial_insert_select = - std::all_of(union_modes.begin(), union_modes.end(), std::move(mode_is_all)) - && std::all_of(selects.begin(), selects.end(), isTrivialSelect); - } - - if (is_trivial_insert_select) - { - /** When doing trivial INSERT INTO ... SELECT ... FROM table, - * don't need to process SELECT with more than max_insert_threads - * and it's reasonable to set block size for SELECT to the desired block size for INSERT - * to avoid unnecessary squashing. - */ - - Settings new_settings = getContext()->getSettings(); - - new_settings.max_threads = std::max(1, settings.max_insert_threads); - - if (table->prefersLargeBlocks()) - { - if (settings.min_insert_block_size_rows) - new_settings.max_block_size = settings.min_insert_block_size_rows; - if (settings.min_insert_block_size_bytes) - new_settings.preferred_block_size_bytes = settings.min_insert_block_size_bytes; - } - - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - new_context->setInsertionTable(getContext()->getInsertionTable(), getContext()->getInsertionTableColumnNames()); - - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, new_context, select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, new_context, select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - else - { - /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - auto select_query_options = SelectQueryOptions(QueryProcessingStage::Complete, 1); - - if (settings.allow_experimental_analyzer) - { - InterpreterSelectQueryAnalyzer interpreter_select_analyzer(query.select, getContext(), select_query_options); - pipeline = interpreter_select_analyzer.buildQueryPipeline(); - } - else - { - InterpreterSelectWithUnionQuery interpreter_select(query.select, getContext(), select_query_options); - pipeline = interpreter_select.buildQueryPipeline(); - } - } - - pipeline.dropTotalsAndExtremes(); - - if (settings.max_insert_threads > 1) - { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. - const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); - pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads - : std::min(settings.max_insert_threads, pipeline.getNumStreams()); - - /// Deduplication when passing insert_deduplication_token breaks if using more than one thread - if (!settings.insert_deduplication_token.toString().empty()) - { - LOG_DEBUG( - getLogger("InsertQuery"), - "Insert-select query using insert_deduplication_token, setting streams to 1 to avoid deduplication issues"); - pre_streams_size = 1; - } - - if (table->supportsParallelInsert()) - sink_streams_size = pre_streams_size; - } - - pipeline.resize(pre_streams_size); - - /// Allow to insert Nullable into non-Nullable columns, NULL values will be added as defaults values. - if (getContext()->getSettingsRef().insert_null_as_default) - { - const auto & input_columns = pipeline.getHeader().getColumnsWithTypeAndName(); - const auto & query_columns = query_sample_block.getColumnsWithTypeAndName(); - const auto & output_columns = metadata_snapshot->getColumns(); - - if (input_columns.size() == query_columns.size()) - { - for (size_t col_idx = 0; col_idx < query_columns.size(); ++col_idx) - { - /// Change query sample block columns to Nullable to allow inserting nullable columns, where NULL values will be substituted with - /// default column values (in AddingDefaultsTransform), so all values will be cast correctly. - if (isNullableOrLowCardinalityNullable(input_columns[col_idx].type) - && !isNullableOrLowCardinalityNullable(query_columns[col_idx].type) - && !isVariant(query_columns[col_idx].type) - && !isDynamic(query_columns[col_idx].type) - && output_columns.has(query_columns[col_idx].name)) - query_sample_block.setColumn(col_idx, ColumnWithTypeAndName(makeNullableOrLowCardinalityNullable(query_columns[col_idx].column), makeNullableOrLowCardinalityNullable(query_columns[col_idx].type), query_columns[col_idx].name)); - } - } - } - } - - ThreadGroupPtr running_group; - if (current_thread) - running_group = current_thread->getThreadGroup(); - if (!running_group) - running_group = std::make_shared(getContext()); - for (size_t i = 0; i < sink_streams_size; ++i) - { - auto out = buildSink(table, metadata_snapshot, /* thread_status_holder= */ nullptr, - running_group, /* elapsed_counter_ms= */ nullptr); - sink_chains.emplace_back(std::move(out)); - } - for (size_t i = 0; i < pre_streams_size; ++i) - { - auto out = buildPreSinkChain(sink_chains[0].getInputHeader(), table, metadata_snapshot, query_sample_block); - presink_chains.emplace_back(std::move(out)); - } + for (const auto & column : metadata_snapshot->getColumns()) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && query_sample_block.has(column.name)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } BlockIO res; - /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (distributed_pipeline) + if (query.select) { - res.pipeline = std::move(*distributed_pipeline); - } - else if (query.select) - { - const auto & header = presink_chains.at(0).getInputHeader(); - auto actions_dag = ActionsDAG::makeConvertingActions( - pipeline.getHeader().getColumnsWithTypeAndName(), - header.getColumnsWithTypeAndName(), - ActionsDAG::MatchColumnsMode::Position); - auto actions = std::make_shared(actions_dag, ExpressionActionsSettings::fromContext(getContext(), CompileExpressions::yes)); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.parallel_distributed_insert_select) { - return std::make_shared(in_header, actions); - }); - - /// We need to convert Sparse columns to full, because it's destination storage - /// may not support it or may have different settings for applying Sparse serialization. - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - auto context_ptr = getContext(); - auto counting = std::make_shared(in_header, nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - - return counting; - }); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - size_t threads = presink_chains.size(); - - pipeline.resize(1); - - pipeline.addTransform(std::make_shared( - header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - - pipeline.resize(threads); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + auto distributed = table->distributedWrite(query, getContext()); + if (distributed) { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + res.pipeline = std::move(*distributed); + } + else + { + res.pipeline = buildInsertSelectPipeline(query, table); + } } - - size_t num_select_threads = pipeline.getNumThreads(); - - for (auto & chain : presink_chains) - resources = chain.detachResources(); - for (auto & chain : sink_chains) - resources = chain.detachResources(); - - pipeline.addChains(std::move(presink_chains)); - pipeline.resize(sink_chains.size()); - pipeline.addChains(std::move(sink_chains)); - - if (!settings.parallel_view_processing) + else { - /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. - if (pipeline.getNumThreads() > num_select_threads) - pipeline.setMaxThreads(num_select_threads); + res.pipeline = buildInsertSelectPipeline(query, table); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } - - pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr - { - return std::make_shared(cur_header); - }); - - if (!allow_materialized) - { - for (const auto & column : metadata_snapshot->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); - } - - res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); } else { - auto & chain = presink_chains.at(0); - chain.appendChain(std::move(sink_chains.at(0))); - - if (shouldAddSquashingFroStorage(table)) - { - bool table_prefers_large_blocks = table->prefersLargeBlocks(); - - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(balancing)); - } - - auto context_ptr = getContext(); - auto counting = std::make_shared(chain.getInputHeader(), nullptr, context_ptr->getQuota()); - counting->setProcessListElement(context_ptr->getProcessListElement()); - counting->setProgressCallback(context_ptr->getProgressCallback()); - chain.addSource(std::move(counting)); - - res.pipeline = QueryPipeline(std::move(presink_chains[0])); - res.pipeline.setNumThreads(std::min(res.pipeline.getNumThreads(), settings.max_threads)); - res.pipeline.setConcurrencyControl(settings.use_concurrency_control); - - if (query.hasInlinedData() && !async_insert) - { - /// can execute without additional data - auto format = getInputFormatFromASTInsertQuery(query_ptr, true, query_sample_block, getContext(), nullptr); - for (auto && buffer : owned_buffers) - format->addBuffer(std::move(buffer)); - - auto pipe = getSourceFromInputFormat(query_ptr, std::move(format), getContext(), nullptr); - res.pipeline.complete(std::move(pipe)); - } + res.pipeline = buildInsertPipeline(query, table); } - res.pipeline.addResources(std::move(resources)); - res.pipeline.addStorageHolder(table); - if (inner_table) - res.pipeline.addStorageHolder(inner_table); + + if (const auto * mv = dynamic_cast(table.get())) + res.pipeline.addStorageHolder(mv->getTargetTable()); return res; } @@ -757,17 +813,27 @@ void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, Cont } } + void InterpreterInsertQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr context_) const { extendQueryLogElemImpl(elem, context_); } + void registerInterpreterInsertQuery(InterpreterFactory & factory) { auto create_fn = [] (const InterpreterFactory::Arguments & args) { - return std::make_unique(args.query, args.context, args.allow_materialized); + return std::make_unique( + args.query, + args.context, + args.allow_materialized, + /* no_squash */false, + /* no_destination */false, + /* async_insert */false); }; factory.registerInterpreter("InterpreterInsertQuery", create_fn); } + + } diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index bf73fb2a319..894c7c42144 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -23,10 +23,10 @@ public: InterpreterInsertQuery( const ASTPtr & query_ptr_, ContextPtr context_, - bool allow_materialized_ = false, - bool no_squash_ = false, - bool no_destination_ = false, - bool async_insert_ = false); + bool allow_materialized_, + bool no_squash_, + bool no_destination, + bool async_insert_); /** Prepare a request for execution. Return block streams * - the stream into which you can write data to execute the query, if INSERT; @@ -73,12 +73,17 @@ private: ASTPtr query_ptr; const bool allow_materialized; - const bool no_squash; - const bool no_destination; + bool no_squash = false; + bool no_destination = false; const bool async_insert; std::vector> owned_buffers; + std::pair, std::vector> buildPreAndSinkChains(size_t presink_streams, size_t sink_streams, StoragePtr table, const StorageMetadataPtr & metadata_snapshot, const Block & query_sample_block); + + QueryPipeline buildInsertSelectPipeline(ASTInsertQuery & query, StoragePtr table); + QueryPipeline buildInsertPipeline(ASTInsertQuery & query, StoragePtr table); + Chain buildSink( const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index f8b6a6542cc..25434d1103e 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -11,24 +12,33 @@ namespace ErrorCodes } Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : header(header_) - , min_block_size_rows(min_block_size_rows_) + : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } Chunk Squashing::flush() { - return convertToChunk(std::move(chunks_to_merge_vec)); + if (!accumulated) + return {}; + + auto result = convertToChunk(accumulated.extract()); + chassert(result); + return result; } Chunk Squashing::squash(Chunk && input_chunk) { - if (!input_chunk.hasChunkInfo()) + if (!input_chunk) return Chunk(); - const auto *info = getInfoFromChunk(input_chunk); - return squash(info->chunks); + auto squash_info = input_chunk.getChunkInfos().extract(); + + if (!squash_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return squash(std::move(squash_info->chunks), std::move(input_chunk.getChunkInfos())); } Chunk Squashing::add(Chunk && input_chunk) @@ -37,48 +47,37 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; /// Just read block is already enough. - if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + if (isEnoughSize(input_chunk)) { /// If no accumulated data, return just read block. - if (chunks_to_merge_vec.empty()) + if (!accumulated) { - chunks_to_merge_vec.push_back(std::move(input_chunk)); - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - return res_chunk; + accumulated.add(std::move(input_chunk)); + return convertToChunk(accumulated.extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) + if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - chunks_to_merge_vec.clear(); - changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(accumulated.extract()); + accumulated.add(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector - expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); - chunks_to_merge_vec.push_back(std::move(input_chunk)); + accumulated.add(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) - { - Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); - changeCurrentSize(0, 0); - chunks_to_merge_vec.clear(); - return res_chunk; - } + if (isEnoughSize()) + return convertToChunk(accumulated.extract()); + return {}; } @@ -90,14 +89,15 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const auto info = std::make_shared(); info->chunks = std::move(chunks); - chunks.clear(); - - return Chunk(header.cloneEmptyColumns(), 0, info); + // It is imortant that chunk is not empty, it has to have columns even if they are empty + auto aggr_chunk = Chunk(header.getColumns(), 0); + aggr_chunk.getChunkInfos().add(std::move(info)); + chassert(aggr_chunk); + return aggr_chunk; } -Chunk Squashing::squash(std::vector & input_chunks) +Chunk Squashing::squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos) { - Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -119,35 +119,17 @@ Chunk Squashing::squash(std::vector & input_chunks) for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { const auto source_column = columns[j]; - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } - accumulated_chunk.setColumns(std::move(mutable_columns), rows); - return accumulated_chunk; -} -const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); + Chunk result; + result.setColumns(std::move(mutable_columns), rows); + result.setChunkInfos(infos); + result.getChunkInfos().append(std::move(input_chunks.back().getChunkInfos())); - if (!agg_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return agg_info; -} - -void Squashing::expandCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows += rows; - accumulated_size.bytes += bytes; -} - -void Squashing::changeCurrentSize(size_t rows, size_t bytes) -{ - accumulated_size.rows = rows; - accumulated_size.bytes = bytes; + chassert(result); + return result; } bool Squashing::isEnoughSize(size_t rows, size_t bytes) const @@ -156,4 +138,28 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +bool Squashing::isEnoughSize() const +{ + return isEnoughSize(accumulated.getRows(), accumulated.getBytes()); +}; + +bool Squashing::isEnoughSize(const Chunk & chunk) const +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + +void Squashing::CurrentSize::add(Chunk && chunk) +{ + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + chunks.push_back(std::move(chunk)); +} + +std::vector Squashing::CurrentSize::extract() +{ + auto result = std::move(chunks); + *this = {}; + return result; +} } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d76cca60e41..64a9768a71f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -8,9 +8,18 @@ namespace DB { -struct ChunksToSquash : public ChunkInfo +class ChunksToSquash : public ChunkInfoCloneable { - mutable std::vector chunks = {}; +public: + ChunksToSquash() = default; + ChunksToSquash(const ChunksToSquash & other) + { + chunks.reserve(other.chunks.size()); + for (const auto & chunk: other.chunks) + chunks.push_back(chunk.clone()); + } + + std::vector chunks = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -36,32 +45,35 @@ public: static Chunk squash(Chunk && input_chunk); Chunk flush(); - bool isDataLeft() - { - return !chunks_to_merge_vec.empty(); - } + void setHeader(Block header_) { header = std::move(header_); } + const Block & getHeader() const { return header; } - Block header; private: - struct CurrentSize + class CurrentSize { + std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; + + public: + explicit operator bool () const { return !chunks.empty(); } + size_t getRows() const { return rows; } + size_t getBytes() const { return bytes; } + void add(Chunk && chunk); + std::vector extract(); }; - std::vector chunks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; + const size_t min_block_size_rows; + const size_t min_block_size_bytes; + Block header; - CurrentSize accumulated_size; + CurrentSize accumulated; - static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); - static Chunk squash(std::vector & input_chunks); - - void expandCurrentSize(size_t rows, size_t bytes); - void changeCurrentSize(size_t rows, size_t bytes); + bool isEnoughSize() const; bool isEnoughSize(size_t rows, size_t bytes) const; + bool isEnoughSize(const Chunk & chunk) const; Chunk convertToChunk(std::vector && chunks) const; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 557065b23ff..f386e157b14 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -538,7 +538,13 @@ void SystemLog::flushImpl(const std::vector & to_flush, insert_context->makeQueryContext(); addSettingsForQuery(insert_context, IAST::QueryKind::Insert); - InterpreterInsertQuery interpreter(query_ptr, insert_context); + InterpreterInsertQuery interpreter( + query_ptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); BlockIO io = interpreter.execute(); PushingPipelineExecutor executor(io.pipeline); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index a3c5a7ed3ed..6ce6f5e454e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1188,7 +1188,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select } } - /// Check for dynamic subcolums in unknown required columns. + /// Check for dynamic subcolumns in unknown required columns. if (!unknown_required_source_columns.empty()) { for (const NameAndTypePair & pair : source_columns_ordinary) diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 5f6cf2f7230..4466be5b3a7 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -19,14 +19,6 @@ Chunk::Chunk(DB::Columns columns_, UInt64 num_rows_) : columns(std::move(columns checkNumRowsIsConsistent(); } -Chunk::Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(std::move(columns_)) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - static Columns unmuteColumns(MutableColumns && mutable_columns) { Columns columns; @@ -43,17 +35,11 @@ Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_) checkNumRowsIsConsistent(); } -Chunk::Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_) - : columns(unmuteColumns(std::move(columns_))) - , num_rows(num_rows_) - , chunk_info(std::move(chunk_info_)) -{ - checkNumRowsIsConsistent(); -} - Chunk Chunk::clone() const { - return Chunk(getColumns(), getNumRows(), chunk_info); + auto tmp = Chunk(getColumns(), getNumRows()); + tmp.setChunkInfos(chunk_infos.clone()); + return tmp; } void Chunk::setColumns(Columns columns_, UInt64 num_rows_) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..1348966c0d3 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -1,7 +1,9 @@ #pragma once +#include #include -#include + +#include namespace DB { @@ -9,11 +11,29 @@ namespace DB class ChunkInfo { public: - virtual ~ChunkInfo() = default; + using Ptr = std::shared_ptr; + ChunkInfo() = default; + ChunkInfo(const ChunkInfo&) = default; + ChunkInfo(ChunkInfo&&) = default; + + virtual Ptr clone() const = 0; + virtual ~ChunkInfo() = default; }; -using ChunkInfoPtr = std::shared_ptr; + +template +class ChunkInfoCloneable : public ChunkInfo +{ +public: + ChunkInfoCloneable() = default; + ChunkInfoCloneable(const ChunkInfoCloneable & other) = default; + + Ptr clone() const override + { + return std::static_pointer_cast(std::make_shared(*static_cast(this))); + } +}; /** * Chunk is a list of columns with the same length. @@ -32,26 +52,26 @@ using ChunkInfoPtr = std::shared_ptr; class Chunk { public: + using ChunkInfoCollection = CollectionOfDerivedItems; + Chunk() = default; Chunk(const Chunk & other) = delete; Chunk(Chunk && other) noexcept : columns(std::move(other.columns)) , num_rows(other.num_rows) - , chunk_info(std::move(other.chunk_info)) + , chunk_infos(std::move(other.chunk_infos)) { other.num_rows = 0; } Chunk(Columns columns_, UInt64 num_rows_); - Chunk(Columns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk(MutableColumns columns_, UInt64 num_rows_); - Chunk(MutableColumns columns_, UInt64 num_rows_, ChunkInfoPtr chunk_info_); Chunk & operator=(const Chunk & other) = delete; Chunk & operator=(Chunk && other) noexcept { columns = std::move(other.columns); - chunk_info = std::move(other.chunk_info); + chunk_infos = std::move(other.chunk_infos); num_rows = other.num_rows; other.num_rows = 0; return *this; @@ -62,15 +82,15 @@ public: void swap(Chunk & other) noexcept { columns.swap(other.columns); - chunk_info.swap(other.chunk_info); std::swap(num_rows, other.num_rows); + chunk_infos.swap(other.chunk_infos); } void clear() { num_rows = 0; columns.clear(); - chunk_info.reset(); + chunk_infos.clear(); } const Columns & getColumns() const { return columns; } @@ -81,9 +101,9 @@ public: /** Get empty columns with the same types as in block. */ MutableColumns cloneEmptyColumns() const; - const ChunkInfoPtr & getChunkInfo() const { return chunk_info; } - bool hasChunkInfo() const { return chunk_info != nullptr; } - void setChunkInfo(ChunkInfoPtr chunk_info_) { chunk_info = std::move(chunk_info_); } + ChunkInfoCollection & getChunkInfos() { return chunk_infos; } + const ChunkInfoCollection & getChunkInfos() const { return chunk_infos; } + void setChunkInfos(ChunkInfoCollection chunk_infos_) { chunk_infos = std::move(chunk_infos_); } UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } @@ -107,7 +127,7 @@ public: private: Columns columns; UInt64 num_rows = 0; - ChunkInfoPtr chunk_info; + ChunkInfoCollection chunk_infos; void checkNumRowsIsConsistent(); }; @@ -117,11 +137,15 @@ using Chunks = std::vector; /// AsyncInsert needs two kinds of information: /// - offsets of different sub-chunks /// - tokens of different sub-chunks, which are assigned by setting `insert_deduplication_token`. -class AsyncInsertInfo : public ChunkInfo +class AsyncInsertInfo : public ChunkInfoCloneable { public: AsyncInsertInfo() = default; - explicit AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) : offsets(offsets_), tokens(tokens_) {} + AsyncInsertInfo(const AsyncInsertInfo & other) = default; + AsyncInsertInfo(const std::vector & offsets_, const std::vector & tokens_) + : offsets(offsets_) + , tokens(tokens_) + {} std::vector offsets; std::vector tokens; @@ -130,9 +154,11 @@ public: using AsyncInsertInfoPtr = std::shared_ptr; /// Extension to support delayed defaults. AddingDefaultsProcessor uses it to replace missing values with column defaults. -class ChunkMissingValues : public ChunkInfo +class ChunkMissingValues : public ChunkInfoCloneable { public: + ChunkMissingValues(const ChunkMissingValues & other) = default; + using RowsBitMask = std::vector; /// a bit per row for a column const RowsBitMask & getDefaultsBitmask(size_t column_idx) const; diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index d27002197d2..d9fab88fe1f 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -147,13 +147,10 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) block = lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index cbf73c5cb07..25c15d40c9a 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -73,13 +73,10 @@ bool PullingPipelineExecutor::pull(Block & block) } block = pulling_format->getPort(IOutputFormat::PortKind::Main).getHeader().cloneWithColumns(chunk.detachColumns()); - if (auto chunk_info = chunk.getChunkInfo()) + if (auto agg_info = chunk.getChunkInfos().get()) { - if (const auto * agg_info = typeid_cast(chunk_info.get())) - { - block.info.bucket_num = agg_info->bucket_num; - block.info.is_overflows = agg_info->is_overflows; - } + block.info.bucket_num = agg_info->bucket_num; + block.info.is_overflows = agg_info->is_overflows; } return true; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index a5d334f4f1d..9e499e2c400 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -179,7 +179,9 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) columns[i]->insertRangeFrom(*concatenated.getColumns()[i], offset, count); Chunks piece; - piece.emplace_back(std::move(columns), count, concatenated.getChunkInfo()); + piece.emplace_back(std::move(columns), count); + piece.back().setChunkInfos(concatenated.getChunkInfos()); + writeRowGroup(std::move(piece)); } } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 4136fc5a5f2..46be6e74693 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -8,8 +8,9 @@ namespace ErrorCodes } IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_header) - : IProcessor({std::move(input_header)}, {std::move(output_header)}), - input(inputs.front()), output(outputs.front()) + : IProcessor({std::move(input_header)}, {std::move(output_header)}) + , input(inputs.front()) + , output(outputs.front()) { } diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp index 466adf93538..86675bcb237 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.cpp @@ -53,13 +53,11 @@ void FinishAggregatingInOrderAlgorithm::consume(Input & input, size_t source_num if (!input.chunk.hasRows()) return; - const auto & info = input.chunk.getChunkInfo(); - if (!info) + if (input.chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in FinishAggregatingInOrderAlgorithm"); Int64 allocated_bytes = 0; - /// Will be set by AggregatingInOrderTransform during local aggregation; will be nullptr during merging on initiator. - if (const auto * arenas_info = typeid_cast(info.get())) + if (auto arenas_info = input.chunk.getChunkInfos().get()) allocated_bytes = arenas_info->allocated_bytes; states[source_num] = State{input.chunk, description, allocated_bytes}; @@ -136,7 +134,7 @@ Chunk FinishAggregatingInOrderAlgorithm::prepareToMerge() info->chunk_num = chunk_num++; Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -163,7 +161,7 @@ void FinishAggregatingInOrderAlgorithm::addToAggregation() chunks.emplace_back(std::move(new_columns), current_rows); } - chunks.back().setChunkInfo(std::make_shared()); + chunks.back().getChunkInfos().add(std::make_shared()); states[i].current_row = states[i].to_row; /// We assume that sizes in bytes of rows are almost the same. diff --git a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h index bcf4e759024..e4f22deec8d 100644 --- a/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h +++ b/src/Processors/Merges/Algorithms/MergeTreePartLevelInfo.h @@ -6,18 +6,22 @@ namespace DB { /// To carry part level if chunk is produced by a merge tree source -class MergeTreePartLevelInfo : public ChunkInfo +class MergeTreePartLevelInfo : public ChunkInfoCloneable { public: MergeTreePartLevelInfo() = delete; - explicit MergeTreePartLevelInfo(ssize_t part_level) : origin_merge_tree_part_level(part_level) { } + explicit MergeTreePartLevelInfo(ssize_t part_level) + : origin_merge_tree_part_level(part_level) + { } + MergeTreePartLevelInfo(const MergeTreePartLevelInfo & other) = default; + size_t origin_merge_tree_part_level = 0; }; inline size_t getPartLevelFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (const auto * part_level_info = typeid_cast(info.get())) + const auto part_level_info = chunk.getChunkInfos().get(); + if (part_level_info) return part_level_info->origin_merge_tree_part_level; return 0; } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index 7b2c7d82a01..cd347d371d9 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes static IMergingAlgorithm::Status emitChunk(detail::SharedChunkPtr & chunk, bool finished = false) { - chunk->setChunkInfo(std::make_shared(std::move(chunk->replace_final_selection))); + chunk->getChunkInfos().add(std::make_shared(std::move(chunk->replace_final_selection))); return IMergingAlgorithm::Status(std::move(*chunk), finished); } diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index a3ccccf0845..2f23f2a5c4d 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace Poco { @@ -14,11 +15,13 @@ namespace DB /** Use in skipping final to keep list of indices of selected row after merging final */ -struct ChunkSelectFinalIndices : public ChunkInfo +struct ChunkSelectFinalIndices : public ChunkInfoCloneable { + explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); + ChunkSelectFinalIndices(const ChunkSelectFinalIndices & other) = default; + const ColumnPtr column_holder; const ColumnUInt64 * select_final_indices = nullptr; - explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_); }; /** Merges several sorted inputs into one. diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index fbb47969b2f..b1b0182a113 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -157,7 +157,7 @@ IProcessor::Status IMergingTransformBase::prepare() bool is_port_full = !output.canPush(); /// Push if has data. - if ((state.output_chunk || state.output_chunk.hasChunkInfo()) && !is_port_full) + if ((state.output_chunk || !state.output_chunk.getChunkInfos().empty()) && !is_port_full) output.push(std::move(state.output_chunk)); if (!is_initialized) diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index c218f622870..be629271736 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -129,7 +129,7 @@ public: IMergingAlgorithm::Status status = algorithm.merge(); - if ((status.chunk && status.chunk.hasRows()) || status.chunk.hasChunkInfo()) + if ((status.chunk && status.chunk.hasRows()) || !status.chunk.getChunkInfos().empty()) { // std::cerr << "Got chunk with " << status.chunk.getNumRows() << " rows" << std::endl; state.output_chunk = std::move(status.chunk); diff --git a/src/Processors/Sinks/RemoteSink.h b/src/Processors/Sinks/RemoteSink.h index 30cf958c072..c05cc1defcb 100644 --- a/src/Processors/Sinks/RemoteSink.h +++ b/src/Processors/Sinks/RemoteSink.h @@ -20,7 +20,7 @@ public: } String getName() const override { return "RemoteSink"; } - void consume (Chunk chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.detachColumns())); } + void consume (Chunk & chunk) override { write(RemoteInserter::getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override { RemoteInserter::onFinish(); } }; diff --git a/src/Processors/Sinks/SinkToStorage.cpp b/src/Processors/Sinks/SinkToStorage.cpp index 5f9f9f9b1a1..36bb70f493f 100644 --- a/src/Processors/Sinks/SinkToStorage.cpp +++ b/src/Processors/Sinks/SinkToStorage.cpp @@ -15,9 +15,8 @@ void SinkToStorage::onConsume(Chunk chunk) */ Nested::validateArraySizes(getHeader().cloneWithColumns(chunk.getColumns())); - consume(chunk.clone()); - if (!lastBlockIsDuplicate()) - cur_chunk = std::move(chunk); + consume(chunk); + cur_chunk = std::move(chunk); } SinkToStorage::GenerateResult SinkToStorage::onGenerate() diff --git a/src/Processors/Sinks/SinkToStorage.h b/src/Processors/Sinks/SinkToStorage.h index 023bbd8b094..c728fa87b1e 100644 --- a/src/Processors/Sinks/SinkToStorage.h +++ b/src/Processors/Sinks/SinkToStorage.h @@ -18,8 +18,7 @@ public: void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } protected: - virtual void consume(Chunk chunk) = 0; - virtual bool lastBlockIsDuplicate() const { return false; } + virtual void consume(Chunk & chunk) = 0; private: std::vector table_locks; @@ -38,7 +37,7 @@ class NullSinkToStorage : public SinkToStorage public: using SinkToStorage::SinkToStorage; std::string getName() const override { return "NullSinkToStorage"; } - void consume(Chunk) override {} + void consume(Chunk &) override {} }; using SinkPtr = std::shared_ptr; diff --git a/src/Processors/Sources/BlocksSource.h b/src/Processors/Sources/BlocksSource.h index ec0dc9609f1..7ac460c14e2 100644 --- a/src/Processors/Sources/BlocksSource.h +++ b/src/Processors/Sources/BlocksSource.h @@ -43,7 +43,10 @@ protected: info->bucket_num = res.info.bucket_num; info->is_overflows = res.info.is_overflows; - return Chunk(res.getColumns(), res.rows(), std::move(info)); + auto chunk = Chunk(res.getColumns(), res.rows()); + chunk.getChunkInfos().add(std::move(info)); + + return chunk; } private: diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 3d7dd3f76b8..1578bd389c9 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -176,7 +176,7 @@ std::optional RemoteSource::tryGenerate() auto info = std::make_shared(); info->bucket_num = block.info.bucket_num; info->is_overflows = block.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } return chunk; diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 00f40a34361..9abe0504d10 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -5,7 +5,9 @@ namespace DB { -SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} +SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) +{ +} SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { @@ -20,7 +22,7 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp auto info = std::make_shared(); info->bucket_num = data.info.bucket_num; info->is_overflows = data.info.is_overflows; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 9ffe15d0f85..45b0960ec8f 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -332,7 +332,7 @@ void AggregatingInOrderTransform::generate() variants.aggregates_pool = variants.aggregates_pools.at(0).get(); /// Pass info about used memory by aggregate functions further. - to_push_chunk.setChunkInfo(std::make_shared(cur_block_bytes)); + to_push_chunk.getChunkInfos().add(std::make_shared(cur_block_bytes)); cur_block_bytes = 0; cur_block_size = 0; @@ -351,11 +351,12 @@ FinalizeAggregatedTransform::FinalizeAggregatedTransform(Block header, Aggregati void FinalizeAggregatedTransform::transform(Chunk & chunk) { if (params->final) - finalizeChunk(chunk, aggregates_mask); - else if (!chunk.getChunkInfo()) { - auto info = std::make_shared(); - chunk.setChunkInfo(std::move(info)); + finalizeChunk(chunk, aggregates_mask); + } + else if (!chunk.getChunkInfos().get()) + { + chunk.getChunkInfos().add(std::make_shared()); } } diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 5d50e97f552..41a0d7fc7f1 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB { @@ -12,10 +13,12 @@ namespace DB struct InputOrderInfo; using InputOrderInfoPtr = std::shared_ptr; -struct ChunkInfoWithAllocatedBytes : public ChunkInfo +struct ChunkInfoWithAllocatedBytes : public ChunkInfoCloneable { + ChunkInfoWithAllocatedBytes(const ChunkInfoWithAllocatedBytes & other) = default; explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} + Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 65f0612d738..517f035667f 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -35,7 +35,7 @@ Chunk convertToChunk(const Block & block) UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); return chunk; } @@ -44,15 +44,11 @@ namespace { const AggregatedChunkInfo * getInfoFromChunk(const Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); - return agg_info; + return agg_info.get(); } /// Reads chunks from file in native format. Provide chunks with aggregation info. @@ -210,11 +206,7 @@ private: void process(Chunk && chunk) { - if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with chunk info in {}", getName()); - - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected chunk with ChunksToMerge info in {}", getName()); diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index e167acde067..95983c39d1e 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -19,7 +20,7 @@ namespace CurrentMetrics namespace DB { -class AggregatedChunkInfo : public ChunkInfo +class AggregatedChunkInfo : public ChunkInfoCloneable { public: bool is_overflows = false; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 965a084bb13..49a6581e685 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -27,18 +27,12 @@ public: } ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } } protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) - cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); + cur_chunk = Squashing::squash(std::move(chunk)); } GenerateResult onGenerate() override @@ -48,16 +42,10 @@ protected: res.is_done = true; return res; } - void onFinish() override - { - auto chunk = DB::Squashing::squash({}); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); - } private: Squashing squashing; Chunk cur_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/CountingTransform.cpp b/src/Processors/Transforms/CountingTransform.cpp index 3dfb9fe178f..2c6b3bd8638 100644 --- a/src/Processors/Transforms/CountingTransform.cpp +++ b/src/Processors/Transforms/CountingTransform.cpp @@ -1,6 +1,7 @@ - -#include #include + +#include +#include #include #include diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp new file mode 100644 index 00000000000..6786f76cbef --- /dev/null +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -0,0 +1,236 @@ +#include + +#include + +#include +#include +#include + + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void RestoreChunkInfosTransform::transform(Chunk & chunk) +{ + chunk.getChunkInfos().append(chunk_infos.clone()); +} + +namespace DeduplicationToken +{ + +String TokenInfo::getToken() const +{ + if (!isDefined()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is not defined, stage {}, token {}", stage, debugToken()); + + return getTokenImpl(); +} + +String TokenInfo::getTokenImpl() const +{ + String result; + result.reserve(getTotalSize()); + + for (const auto & part : parts) + { + if (!result.empty()) + result.append(":"); + result.append(part); + } + + return result; +} + +String TokenInfo::debugToken() const +{ + return getTokenImpl(); +} + +void TokenInfo::addChunkHash(String part) +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(std::move(part)); +} + +void TokenInfo::finishChunkHashes() +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_WITH_HASHES; + + if (stage != DEFINE_SOURCE_WITH_HASHES) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + stage = DEFINED; +} + +void TokenInfo::setUserToken(const String & token) +{ + if (stage == UNDEFINED && empty()) + stage = DEFINE_SOURCE_USER_TOKEN; + + if (stage != DEFINE_SOURCE_USER_TOKEN) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("user-token-{}", token)); +} + +void TokenInfo::setSourceWithUserToken(size_t block_number) +{ + if (stage != DEFINE_SOURCE_USER_TOKEN) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("source-number-{}", block_number)); + + stage = DEFINED; +} + +void TokenInfo::setViewID(const String & id) +{ + if (stage == DEFINED) + stage = DEFINE_VIEW; + + if (stage != DEFINE_VIEW) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-id-{}", id)); +} + +void TokenInfo::setViewBlockNumber(size_t block_number) +{ + if (stage != DEFINE_VIEW) + throw Exception(ErrorCodes::LOGICAL_ERROR, "token is in wrong stage {}, token {}", stage, debugToken()); + + addTokenPart(fmt::format("view-block-{}", block_number)); + + stage = DEFINED; +} + +void TokenInfo::reset() +{ + stage = UNDEFINED; + parts.clear(); +} + +void TokenInfo::addTokenPart(String part) +{ + parts.push_back(std::move(part)); +} + +size_t TokenInfo::getTotalSize() const +{ + if (parts.empty()) + return 0; + + size_t size = 0; + for (const auto & part : parts) + size += part.size(); + + // we reserve more size here to be able to add delimenter between parts. + return size + parts.size() - 1; +} + +#ifdef ABORT_ON_LOGICAL_ERROR +void CheckTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + + LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); +} +#endif + +String DefineSourceWithChunkHashTransform::getChunkHash(const Chunk & chunk) +{ + SipHash hash; + for (const auto & colunm : chunk.getColumns()) + colunm->updateHashFast(hash); + + const auto hash_value = hash.get128(); + return toString(hash_value.items[0]) + "_" + toString(hash_value.items[1]); +} + + +void DefineSourceWithChunkHashTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in DefineSourceWithChunkHashesTransform"); + + if (token_info->isDefined()) + return; + + token_info->addChunkHash(getChunkHash(chunk)); + token_info->finishChunkHashes(); +} + +void SetUserTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetUserTokenTransform"); + token_info->setUserToken(user_token); +} + +void SetSourceBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetSourceBlockNumberTransform"); + token_info->setSourceWithUserToken(block_number++); +} + +void SetViewIDTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewIDTransform"); + token_info->setViewID(view_id); +} + +void SetViewBlockNumberTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in SetViewBlockNumberTransform"); + token_info->setViewBlockNumber(block_number++); +} + +void ResetTokenTransform::transform(Chunk & chunk) +{ + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ResetTokenTransform"); + + token_info->reset(); +} + +} +} diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.h b/src/Processors/Transforms/DeduplicationTokenTransforms.h new file mode 100644 index 00000000000..d6aff9e1370 --- /dev/null +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.h @@ -0,0 +1,237 @@ +#pragma once + +#include +#include + +#include +#include "Common/Logger.h" + + +namespace DB +{ + class RestoreChunkInfosTransform : public ISimpleTransform + { + public: + RestoreChunkInfosTransform(Chunk::ChunkInfoCollection chunk_infos_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , chunk_infos(std::move(chunk_infos_)) + {} + + String getName() const override { return "RestoreChunkInfosTransform"; } + + void transform(Chunk & chunk) override; + + private: + Chunk::ChunkInfoCollection chunk_infos; + }; + + +namespace DeduplicationToken +{ + class TokenInfo : public ChunkInfoCloneable + { + public: + TokenInfo() = default; + TokenInfo(const TokenInfo & other) = default; + + String getToken() const; + String debugToken() const; + + bool empty() const { return parts.empty(); } + + bool isDefined() const { return stage == DEFINED; } + + void addChunkHash(String part); + void finishChunkHashes(); + + void setUserToken(const String & token); + void setSourceWithUserToken(size_t block_number); + + void setViewID(const String & id); + void setViewBlockNumber(size_t block_number); + + void reset(); + + private: + String getTokenImpl() const; + + void addTokenPart(String part); + size_t getTotalSize() const; + + /* Token has to be prepared in a particular order. + * BuildingStage ensures that token is expanded according the following order. + * Firstly token is expanded with information about the source. + * It could be done with two ways: add several hash sums from the source chunks or provide user defined deduplication token and its sequentional block number. + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINE_SOURCE_WITH_HASHES // addChunkHash + * DEFINE_SOURCE_WITH_HASHES -> DEFINED // defineSourceWithChankHashes + * + * transition // method + * UNDEFINED -> DEFINE_SOURCE_USER_TOKEN // setUserToken + * DEFINE_SOURCE_USER_TOKEN -> DEFINED // defineSourceWithUserToken + * + * After token is defined, it could be extended with view id and view block number. Actually it has to be expanded with view details if there is one or several views. + * + * transition // method + * DEFINED -> DEFINE_VIEW // setViewID + * DEFINE_VIEW -> DEFINED // defineViewID + */ + + enum BuildingStage + { + UNDEFINED, + DEFINE_SOURCE_WITH_HASHES, + DEFINE_SOURCE_USER_TOKEN, + DEFINE_VIEW, + DEFINED, + }; + + BuildingStage stage = UNDEFINED; + std::vector parts; + }; + + +#ifdef ABORT_ON_LOGICAL_ERROR + /// use that class only with debug builds in CI for introspection + class CheckTokenTransform : public ISimpleTransform + { + public: + CheckTokenTransform(String debug_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , debug(std::move(debug_)) + { + } + + String getName() const override { return "DeduplicationToken::CheckTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String debug; + LoggerPtr log = getLogger("CheckInsertDeduplicationTokenTransform"); + }; +#endif + + + class AddTokenInfoTransform : public ISimpleTransform + { + public: + explicit AddTokenInfoTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::AddTokenInfoTransform"; } + + void transform(Chunk & chunk) override + { + chunk.getChunkInfos().add(std::make_shared()); + } + }; + + + class DefineSourceWithChunkHashTransform : public ISimpleTransform + { + public: + explicit DefineSourceWithChunkHashTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::DefineSourceWithChunkHashesTransform"; } + + // Usually MergeTreeSink/ReplicatedMergeTreeSink calls addChunkHash for the deduplication token with hashes from the parts. + // But if there is some table with different engine, we still need to define the source of the data in deduplication token + // We use that transform to define the source as a hash of entire block in deduplication token + void transform(Chunk & chunk) override; + + static String getChunkHash(const Chunk & chunk); + }; + + class ResetTokenTransform : public ISimpleTransform + { + public: + explicit ResetTokenTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::ResetTokenTransform"; } + + void transform(Chunk & chunk) override; + }; + + + class SetUserTokenTransform : public ISimpleTransform + { + public: + SetUserTokenTransform(String user_token_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , user_token(std::move(user_token_)) + { + } + + String getName() const override { return "DeduplicationToken::SetUserTokenTransform"; } + + void transform(Chunk & chunk) override; + + private: + String user_token; + }; + + + class SetSourceBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetSourceBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetSourceBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number = 0; + }; + + + class SetViewIDTransform : public ISimpleTransform + { + public: + SetViewIDTransform(String view_id_, const Block & header_) + : ISimpleTransform(header_, header_, true) + , view_id(std::move(view_id_)) + { + } + + String getName() const override { return "DeduplicationToken::SetViewIDTransform"; } + + void transform(Chunk & chunk) override; + + private: + String view_id; + }; + + + class SetViewBlockNumberTransform : public ISimpleTransform + { + public: + explicit SetViewBlockNumberTransform(const Block & header_) + : ISimpleTransform(header_, header_, true) + { + } + + String getName() const override { return "DeduplicationToken::SetViewBlockNumberTransform"; } + + void transform(Chunk & chunk) override; + + private: + size_t block_number = 0; + }; + +} +} diff --git a/src/Processors/Transforms/ExpressionTransform.cpp b/src/Processors/Transforms/ExpressionTransform.cpp index 2fbd2c21b8d..04fabc9a3c6 100644 --- a/src/Processors/Transforms/ExpressionTransform.cpp +++ b/src/Processors/Transforms/ExpressionTransform.cpp @@ -1,5 +1,7 @@ #include #include + + namespace DB { diff --git a/src/Processors/Transforms/JoiningTransform.cpp b/src/Processors/Transforms/JoiningTransform.cpp index 3e2a9462e54..ca204bcb482 100644 --- a/src/Processors/Transforms/JoiningTransform.cpp +++ b/src/Processors/Transforms/JoiningTransform.cpp @@ -365,10 +365,9 @@ IProcessor::Status DelayedJoinedBlocksWorkerTransform::prepare() return Status::Finished; } - if (!data.chunk.hasChunkInfo()) + task = data.chunk.getChunkInfos().get(); + if (!task) throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform must have chunk info"); - - task = std::dynamic_pointer_cast(data.chunk.getChunkInfo()); } else { @@ -479,7 +478,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() if (output.isFinished()) continue; Chunk chunk; - chunk.setChunkInfo(std::make_shared()); + chunk.getChunkInfos().add(std::make_shared()); output.push(std::move(chunk)); output.finish(); } @@ -496,7 +495,7 @@ IProcessor::Status DelayedJoinedBlocksTransform::prepare() { Chunk chunk; auto task = std::make_shared(delayed_blocks, left_delayed_stream_finished_counter); - chunk.setChunkInfo(task); + chunk.getChunkInfos().add(std::move(task)); output.push(std::move(chunk)); } delayed_blocks = nullptr; diff --git a/src/Processors/Transforms/JoiningTransform.h b/src/Processors/Transforms/JoiningTransform.h index a308af03662..5f6d9d6fff2 100644 --- a/src/Processors/Transforms/JoiningTransform.h +++ b/src/Processors/Transforms/JoiningTransform.h @@ -1,6 +1,7 @@ #pragma once #include - +#include +#include namespace DB { @@ -111,11 +112,12 @@ private: }; -class DelayedBlocksTask : public ChunkInfo +class DelayedBlocksTask : public ChunkInfoCloneable { public: DelayedBlocksTask() = default; + DelayedBlocksTask(const DelayedBlocksTask & other) = default; explicit DelayedBlocksTask(IBlocksStreamPtr delayed_blocks_, JoiningTransform::FinishCounterPtr left_delayed_stream_finish_counter_) : delayed_blocks(std::move(delayed_blocks_)) , left_delayed_stream_finish_counter(left_delayed_stream_finish_counter_) diff --git a/src/Processors/Transforms/MaterializingTransform.cpp b/src/Processors/Transforms/MaterializingTransform.cpp index 1eaa5458d37..9ae80e21a68 100644 --- a/src/Processors/Transforms/MaterializingTransform.cpp +++ b/src/Processors/Transforms/MaterializingTransform.cpp @@ -1,6 +1,7 @@ #include #include + namespace DB { diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index 607087fb39c..d7bc320173b 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -150,11 +150,7 @@ private: if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception( ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedForMemoryBoundMergingTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index fc40c6894bb..ea9ebb0f96e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -30,10 +30,10 @@ void GroupingAggregatedTransform::pushData(Chunks chunks, Int32 bucket, bool is_ auto info = std::make_shared(); info->bucket_num = bucket; info->is_overflows = is_overflows; - info->chunks = std::make_unique(std::move(chunks)); + info->chunks = std::make_shared(std::move(chunks)); Chunk chunk; - chunk.setChunkInfo(std::move(info)); + chunk.getChunkInfos().add(std::move(info)); output.push(std::move(chunk)); } @@ -255,11 +255,10 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) if (!chunk.hasRows()) return; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { Int32 bucket = agg_info->bucket_num; bool is_overflows = agg_info->is_overflows; @@ -275,7 +274,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -304,7 +303,11 @@ void GroupingAggregatedTransform::work() Int32 bucket = cur_block.info.bucket_num; auto chunk_info = std::make_shared(); chunk_info->bucket_num = bucket; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); + + auto chunk = Chunk(cur_block.getColumns(), cur_block.rows()); + chunk.getChunkInfos().add(std::move(chunk_info)); + + chunks_map[bucket].emplace_back(std::move(chunk)); } } } @@ -319,9 +322,7 @@ MergingAggregatedBucketTransform::MergingAggregatedBucketTransform( void MergingAggregatedBucketTransform::transform(Chunk & chunk) { - const auto & info = chunk.getChunkInfo(); - const auto * chunks_to_merge = typeid_cast(info.get()); - + auto chunks_to_merge = chunk.getChunkInfos().get(); if (!chunks_to_merge) throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); @@ -330,11 +331,10 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) BlocksList blocks_list; for (auto & cur_chunk : *chunks_to_merge->chunks) { - const auto & cur_info = cur_chunk.getChunkInfo(); - if (!cur_info) + if (cur_chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); - if (const auto * agg_info = typeid_cast(cur_info.get())) + if (auto agg_info = cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = agg_info->is_overflows; @@ -342,7 +342,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (typeid_cast(cur_info.get())) + else if (cur_chunk.getChunkInfos().get()) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; @@ -361,7 +361,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) res_info->is_overflows = chunks_to_merge->is_overflows; res_info->bucket_num = chunks_to_merge->bucket_num; res_info->chunk_num = chunks_to_merge->chunk_num; - chunk.setChunkInfo(std::move(res_info)); + chunk.getChunkInfos().add(std::move(res_info)); auto block = params->aggregator.mergeBlocks(blocks_list, params->final, is_cancelled); @@ -405,11 +405,7 @@ bool SortingAggregatedTransform::tryPushChunk() void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); - - const auto * agg_info = typeid_cast(info.get()); + auto agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in SortingAggregatedTransform."); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h index 77ee3034ffc..3a3c1bd9c1e 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -142,9 +143,9 @@ private: void addChunk(Chunk chunk, size_t from_input); }; -struct ChunksToMerge : public ChunkInfo +struct ChunksToMerge : public ChunkInfoCloneable { - std::unique_ptr chunks; + std::shared_ptr chunks; Int32 bucket_num = -1; bool is_overflows = false; UInt64 chunk_num = 0; // chunk number in order of generation, used during memory bound merging to restore chunks order diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index ad723da7527..446e60a0b81 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -32,11 +32,10 @@ void MergingAggregatedTransform::consume(Chunk chunk) total_input_rows += input_rows; ++total_input_blocks; - const auto & info = chunk.getChunkInfo(); - if (!info) + if (chunk.getChunkInfos().empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); - if (const auto * agg_info = typeid_cast(info.get())) + if (auto agg_info = chunk.getChunkInfos().get()) { /** If the remote servers used a two-level aggregation method, * then blocks will contain information about the number of the bucket. @@ -49,7 +48,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (typeid_cast(info.get())) + else if (chunk.getChunkInfos().get()) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; @@ -89,7 +88,8 @@ Chunk MergingAggregatedTransform::generate() UInt64 num_rows = block.rows(); Chunk chunk(block.getColumns(), num_rows); - chunk.setChunkInfo(std::move(info)); + + chunk.getChunkInfos().add(std::move(info)); return chunk; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 0f433165f14..ee4dfa6a64e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -10,20 +10,20 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : IInflatingTransform(header, header), squashing(header, min_block_size_rows, min_block_size_bytes) + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes) + : IInflatingTransform(header_, header_) + , squashing(header_, min_block_size_rows, min_block_size_bytes) { } void PlanSquashingTransform::consume(Chunk chunk) { - if (Chunk current_chunk = squashing.add(std::move(chunk)); current_chunk.hasChunkInfo()) - squashed_chunk.swap(current_chunk); + squashed_chunk = squashing.add(std::move(chunk)); } Chunk PlanSquashingTransform::generate() { - if (!squashed_chunk.hasChunkInfo()) + if (!squashed_chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't generate chunk in SimpleSquashingChunksTransform"); Chunk result_chunk; @@ -33,12 +33,11 @@ Chunk PlanSquashingTransform::generate() bool PlanSquashingTransform::canGenerate() { - return squashed_chunk.hasChunkInfo(); + return bool(squashed_chunk); } Chunk PlanSquashingTransform::getRemaining() { - Chunk current_chunk = squashing.flush(); - return current_chunk; + return squashing.flush(); } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 4ad2ec2d089..e6db245499e 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,7 +10,7 @@ class PlanSquashingTransform : public IInflatingTransform { public: PlanSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + Block header_, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "PlanSquashingTransform"; } @@ -23,7 +23,6 @@ protected: private: Squashing squashing; Chunk squashed_chunk; - Chunk finish_chunk; }; } diff --git a/src/Processors/Transforms/SelectByIndicesTransform.h b/src/Processors/Transforms/SelectByIndicesTransform.h index 480ab1a0f61..b44f5a3203e 100644 --- a/src/Processors/Transforms/SelectByIndicesTransform.h +++ b/src/Processors/Transforms/SelectByIndicesTransform.h @@ -26,7 +26,7 @@ public: void transform(Chunk & chunk) override { size_t num_rows = chunk.getNumRows(); - const auto * select_final_indices_info = typeid_cast(chunk.getChunkInfo().get()); + auto select_final_indices_info = chunk.getChunkInfos().extract(); if (!select_final_indices_info || !select_final_indices_info->select_final_indices) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk passed to SelectByIndicesTransform without indices column"); @@ -41,7 +41,6 @@ public: chunk.setColumns(std::move(columns), index_column->size()); } - chunk.setChunkInfo(nullptr); } }; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34b733cde5e..1fb4433240a 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -18,9 +18,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (planned_chunk.hasChunkInfo()) - cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); + cur_chunk = Squashing::squash(squashing.add(std::move(chunk))); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -33,10 +31,7 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = squashing.flush(); - if (chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(chunk)); - finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); + finish_chunk = Squashing::squash(squashing.flush()); } void SquashingTransform::work() @@ -49,6 +44,7 @@ void SquashingTransform::work() } ExceptionKeepingTransform::work(); + if (finish_chunk) { data.chunk = std::move(finish_chunk); @@ -67,18 +63,14 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = squashing.add(std::move(chunk)); - if (planned_chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(planned_chunk)); + chunk = Squashing::squash(squashing.add(std::move(chunk))); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = squashing.flush(); - if (chunk.hasChunkInfo()) - chunk = DB::Squashing::squash(std::move(chunk)); + chunk = Squashing::squash(squashing.flush()); } } diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index aa86879e62c..59fceccb538 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -150,11 +150,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) /// Block with values not included in `max_rows_to_group_by`. We'll postpone it. if (overflow_row) { - const auto & info = chunk.getChunkInfo(); - if (!info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); - - const auto * agg_info = typeid_cast(info.get()); + const auto & agg_info = chunk.getChunkInfos().get(); if (!agg_info) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 25fbf13b0e7..312b333ab33 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -5,7 +5,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -16,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -24,9 +27,12 @@ #include #include #include +#include "base/defines.h" +#include #include #include +#include namespace ProfileEvents @@ -105,7 +111,7 @@ private: class ExecutingInnerQueryFromViewTransform final : public ExceptionKeepingTransform { public: - ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_); + ExecutingInnerQueryFromViewTransform(const Block & header, ViewRuntimeData & view_, ViewsDataPtr views_data_, bool disable_deduplication_for_children_); String getName() const override { return "ExecutingInnerQueryFromView"; } @@ -116,6 +122,7 @@ protected: private: ViewsDataPtr views_data; ViewRuntimeData & view; + bool disable_deduplication_for_children; struct State { @@ -138,7 +145,7 @@ class PushingToLiveViewSink final : public SinkToStorage public: PushingToLiveViewSink(const Block & header, StorageLiveView & live_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToLiveViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageLiveView & live_view; @@ -152,7 +159,7 @@ class PushingToWindowViewSink final : public SinkToStorage public: PushingToWindowViewSink(const Block & header, StorageWindowView & window_view_, StoragePtr storage_holder_, ContextPtr context_); String getName() const override { return "PushingToWindowViewSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; private: StorageWindowView & window_view; @@ -216,45 +223,10 @@ std::optional generateViewChain( const auto & insert_settings = insert_context->getSettingsRef(); - // Do not deduplicate insertions into MV if the main insertion is Ok if (disable_deduplication_for_children) { insert_context->setSetting("insert_deduplicate", Field{false}); } - else if (insert_settings.update_insert_deduplication_token_in_dependent_materialized_views && - !insert_settings.insert_deduplication_token.value.empty()) - { - /** Update deduplication token passed to dependent MV with current view id. So it is possible to properly handle - * deduplication in complex INSERT flows. - * - * Example: - * - * landing -┬--> mv_1_1 ---> ds_1_1 ---> mv_2_1 --┬-> ds_2_1 ---> mv_3_1 ---> ds_3_1 - * | | - * └--> mv_1_2 ---> ds_1_2 ---> mv_2_2 --┘ - * - * Here we want to avoid deduplication for two different blocks generated from `mv_2_1` and `mv_2_2` that will - * be inserted into `ds_2_1`. - * - * We are forced to use view id instead of table id because there are some possible INSERT flows where no tables - * are involved. - * - * Example: - * - * landing -┬--> mv_1_1 --┬-> ds_1_1 - * | | - * └--> mv_1_2 --┘ - * - */ - auto insert_deduplication_token = insert_settings.insert_deduplication_token.value; - - if (view_id.hasUUID()) - insert_deduplication_token += "_" + toString(view_id.uuid); - else - insert_deduplication_token += "_" + view_id.getFullNameNotQuoted(); - - insert_context->setSetting("insert_deduplication_token", insert_deduplication_token); - } // Processing of blocks for MVs is done block by block, and there will // be no parallel reading after (plus it is not a costless operation) @@ -361,7 +333,13 @@ std::optional generateViewChain( insert_columns.emplace_back(column.name); } - InterpreterInsertQuery interpreter(nullptr, insert_context, false, false, false); + InterpreterInsertQuery interpreter( + nullptr, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); /// TODO: remove sql_security_type check after we turn `ignore_empty_sql_security_in_create_view_query=false` bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; @@ -378,6 +356,10 @@ std::optional generateViewChain( table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); } +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Before squashing", out.getInputHeader())); +#endif + auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); counting->setProgressCallback(insert_context->getProgressCallback()); @@ -420,11 +402,19 @@ std::optional generateViewChain( if (type == QueryViewsLogElement::ViewType::MATERIALIZED) { +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Right after Inner query", out.getInputHeader())); +#endif + auto executing_inner_query = std::make_shared( - storage_header, views_data->views.back(), views_data); + storage_header, views_data->views.back(), views_data, disable_deduplication_for_children); executing_inner_query->setRuntimeData(view_thread_status, view_counter_ms); out.addSource(std::move(executing_inner_query)); + +#ifdef ABORT_ON_LOGICAL_ERROR + out.addSource(std::make_shared("Right before Inner query", out.getInputHeader())); +#endif } return out; @@ -465,11 +455,7 @@ Chain buildPushingToViewsChain( */ result_chain.addTableLock(storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); - /// If the "root" table deduplicates blocks, there are no need to make deduplication for children - /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks - bool disable_deduplication_for_children = false; - if (!context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - disable_deduplication_for_children = !no_destination && storage->supportsDeduplication(); + bool disable_deduplication_for_children = !context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views; auto table_id = storage->getStorageID(); auto views = DatabaseCatalog::instance().getDependentViews(table_id); @@ -560,12 +546,25 @@ Chain buildPushingToViewsChain( auto sink = std::make_shared(live_view_header, *live_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } else if (auto * window_view = dynamic_cast(storage.get())) { auto sink = std::make_shared(window_view->getInputHeader(), *window_view, storage, context); sink->setRuntimeData(thread_status, elapsed_counter_ms); result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); + } + else if (dynamic_cast(storage.get())) + { + auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); + metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); + sink->setRuntimeData(thread_status, elapsed_counter_ms); + result_chain.addSource(std::move(sink)); + + result_chain.addSource(std::make_shared(result_chain.getInputHeader())); } /// Do not push to destination table if the flag is set else if (!no_destination) @@ -573,8 +572,15 @@ Chain buildPushingToViewsChain( auto sink = storage->write(query_ptr, metadata_snapshot, context, async_insert); metadata_snapshot->check(sink->getHeader().getColumnsWithTypeAndName()); sink->setRuntimeData(thread_status, elapsed_counter_ms); + + result_chain.addSource(std::make_shared(sink->getHeader())); + result_chain.addSource(std::move(sink)); } + else + { + result_chain.addSource(std::make_shared(storage_header)); + } if (result_chain.empty()) result_chain.addSink(std::make_shared(storage_header)); @@ -590,7 +596,7 @@ Chain buildPushingToViewsChain( return result_chain; } -static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data) +static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsData & views_data, Chunk::ChunkInfoCollection && chunk_infos, bool disable_deduplication_for_children) { const auto & context = view.context; @@ -637,6 +643,19 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), std::make_shared(std::move(converting)))); + pipeline.addTransform(std::make_shared(std::move(chunk_infos), pipeline.getHeader())); + + if (!disable_deduplication_for_children) + { + String materialize_view_id = view.table_id.hasUUID() ? toString(view.table_id.uuid) : view.table_id.getFullNameNotQuoted(); + pipeline.addTransform(std::make_shared(std::move(materialize_view_id), pipeline.getHeader())); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + else + { + pipeline.addTransform(std::make_shared(pipeline.getHeader())); + } + return QueryPipelineBuilder::getPipeline(std::move(pipeline)); } @@ -728,17 +747,19 @@ IProcessor::Status CopyingDataToViewsTransform::prepare() ExecutingInnerQueryFromViewTransform::ExecutingInnerQueryFromViewTransform( const Block & header, ViewRuntimeData & view_, - std::shared_ptr views_data_) + std::shared_ptr views_data_, + bool disable_deduplication_for_children_) : ExceptionKeepingTransform(header, view_.sample_block) , views_data(std::move(views_data_)) , view(view_) + , disable_deduplication_for_children(disable_deduplication_for_children_) { } void ExecutingInnerQueryFromViewTransform::onConsume(Chunk chunk) { - auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); - state.emplace(process(block, view, *views_data)); + auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); + state.emplace(process(std::move(block), view, *views_data, std::move(chunk.getChunkInfos()), disable_deduplication_for_children)); } @@ -770,10 +791,10 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi { } -void PushingToLiveViewSink::consume(Chunk chunk) +void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(getHeader().cloneWithColumns(chunk.detachColumns()), context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -793,11 +814,11 @@ PushingToWindowViewSink::PushingToWindowViewSink( { } -void PushingToWindowViewSink::consume(Chunk chunk) +void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), context); + window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/src/QueryPipeline/QueryPipelineBuilder.h b/src/QueryPipeline/QueryPipelineBuilder.h index f0b2ead687e..a9e5b1535c0 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.h +++ b/src/QueryPipeline/QueryPipelineBuilder.h @@ -193,7 +193,7 @@ public: return concurrency_control; } - void addResources(QueryPlanResourceHolder resources_) { resources = std::move(resources_); } + void addResources(QueryPlanResourceHolder resources_) { resources.append(std::move(resources_)); } void setQueryIdHolder(std::shared_ptr query_id_holder) { resources.query_id_holders.emplace_back(std::move(query_id_holder)); } void addContext(ContextPtr context) { resources.interpreter_context.emplace_back(std::move(context)); } diff --git a/src/QueryPipeline/QueryPlanResourceHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp index 2cd4dc42a83..bb2be2c8ffb 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -5,7 +5,7 @@ namespace DB { -QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +QueryPlanResourceHolder & QueryPlanResourceHolder::append(QueryPlanResourceHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); @@ -16,6 +16,12 @@ QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHo return *this; } +QueryPlanResourceHolder & QueryPlanResourceHolder::operator=(QueryPlanResourceHolder && rhs) noexcept +{ + append(std::move(rhs)); + return *this; +} + QueryPlanResourceHolder::QueryPlanResourceHolder() = default; QueryPlanResourceHolder::QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept = default; QueryPlanResourceHolder::~QueryPlanResourceHolder() = default; diff --git a/src/QueryPipeline/QueryPlanResourceHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h index ed9eb68b7ba..10f7f39ab09 100644 --- a/src/QueryPipeline/QueryPlanResourceHolder.h +++ b/src/QueryPipeline/QueryPlanResourceHolder.h @@ -20,8 +20,11 @@ struct QueryPlanResourceHolder QueryPlanResourceHolder(QueryPlanResourceHolder &&) noexcept; ~QueryPlanResourceHolder(); + QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &) = delete; + /// Custom move assignment does not destroy data from lhs. It appends data from rhs to lhs. QueryPlanResourceHolder & operator=(QueryPlanResourceHolder &&) noexcept; + QueryPlanResourceHolder & append(QueryPlanResourceHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index ac1423f87c1..fccea9e258e 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -888,12 +888,11 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - squashing.header = state.block_for_insert; - auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - if (planned_chunk.hasChunkInfo()) + squashing.setHeader(state.block_for_insert.cloneEmpty()); + auto result_chunk = Squashing::squash(squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()})); + if (result_chunk) { - Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -902,12 +901,13 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = squashing.flush(); - Chunk result_chunk; - if (planned_chunk.hasChunkInfo()) - result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + Chunk result_chunk = Squashing::squash(squashing.flush()); + if (!result_chunk) + { + return insert_queue.pushQueryWithBlock(state.parsed_query, squashing.getHeader(), query_context); + } - auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.getHeader().cloneWithColumns(result_chunk.detachColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index e556bda2561..8791668cd89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -134,7 +134,7 @@ DistributedSink::DistributedSink( } -void DistributedSink::consume(Chunk chunk) +void DistributedSink::consume(Chunk & chunk) { if (is_first_chunk) { @@ -142,7 +142,7 @@ void DistributedSink::consume(Chunk chunk) is_first_chunk = false; } - auto ordinary_block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto ordinary_block = getHeader().cloneWithColumns(chunk.getColumns()); if (insert_sync) writeSync(ordinary_block); @@ -420,7 +420,13 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si /// to resolve tables (in InterpreterInsertQuery::getTable()) auto copy_query_ast = query_ast->clone(); - InterpreterInsertQuery interp(copy_query_ast, job.local_context, allow_materialized); + InterpreterInsertQuery interp( + copy_query_ast, + job.local_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); job.pipeline = std::move(block_io.pipeline); @@ -715,7 +721,13 @@ void DistributedSink::writeToLocal(const Cluster::ShardInfo & shard_info, const try { - InterpreterInsertQuery interp(query_ast, context, allow_materialized); + InterpreterInsertQuery interp( + query_ast, + context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interp.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/Distributed/DistributedSink.h b/src/Storages/Distributed/DistributedSink.h index a4c95633595..5b7396f2c6f 100644 --- a/src/Storages/Distributed/DistributedSink.h +++ b/src/Storages/Distributed/DistributedSink.h @@ -49,7 +49,7 @@ public: const Names & columns_to_send_); String getName() const override { return "DistributedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index abd4b4ce23b..0f9bd8b6ff9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -740,7 +740,14 @@ bool StorageFileLog::streamToViews() auto new_context = Context::createCopy(getContext()); - InterpreterInsertQuery interpreter(insert, new_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + new_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); + auto block_io = interpreter.execute(); /// Each stream responsible for closing it's files and store meta diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index f5c5d093ce1..809401bb279 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -1099,7 +1099,13 @@ bool StorageKafka::streamToViews() // Create a stream for each consumer and join them in a union stream // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, kafka_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + kafka_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); // Create a stream for each consumer and join them in a union stream diff --git a/src/Storages/LiveView/LiveViewSink.h b/src/Storages/LiveView/LiveViewSink.h index 792133ced64..9803fa0a160 100644 --- a/src/Storages/LiveView/LiveViewSink.h +++ b/src/Storages/LiveView/LiveViewSink.h @@ -71,9 +71,9 @@ public: new_hash.reset(); } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); block.updateHash(*new_hash); new_blocks->push_back(std::move(block)); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 57a1ea302f9..82759e8a851 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include +#include #include #include #include @@ -330,7 +331,7 @@ Pipe StorageLiveView::watch( return reader; } -void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) +void StorageLiveView::writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { auto output = std::make_shared(*this); @@ -407,6 +408,21 @@ void StorageLiveView::writeBlock(const Block & block, ContextPtr local_context) builder = interpreter.buildQueryPipeline(); } + builder.addSimpleTransform([&](const Block & cur_header) + { + return std::make_shared(chunk_infos.clone(), cur_header); + }); + + String live_view_id = live_view.getStorageID().hasUUID() ? toString(live_view.getStorageID().uuid) : live_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(live_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + builder.addSimpleTransform([&](const Block & cur_header) { return std::make_shared(cur_header); diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 91daac32c7b..12d8e898347 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -118,7 +118,7 @@ public: return 0; } - void writeBlock(const Block & block, ContextPtr context); + void writeBlock(StorageLiveView & live_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); void refresh(); diff --git a/src/Storages/MaterializedView/RefreshTask.cpp b/src/Storages/MaterializedView/RefreshTask.cpp index bc8cb0ce69a..ff5214a5e51 100644 --- a/src/Storages/MaterializedView/RefreshTask.cpp +++ b/src/Storages/MaterializedView/RefreshTask.cpp @@ -377,7 +377,13 @@ void RefreshTask::executeRefreshUnlocked(std::shared_ptr(task->getInfo().data_part->info.level)); + return ChunkAndProgress{ - .chunk = Chunk(ordered_columns, res.row_count, add_part_level ? std::make_shared(task->getInfo().data_part->info.level) : nullptr), + .chunk = std::move(chunk), .num_read_rows = res.num_read_rows, .num_read_bytes = res.num_read_bytes, .is_finished = false}; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 02f8d6f4f6a..4f90f7131da 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -264,7 +264,10 @@ try ++it; } - return Chunk(std::move(res_columns), rows_read, add_part_level ? std::make_shared(data_part->info.level) : nullptr); + auto result = Chunk(std::move(res_columns), rows_read); + if (add_part_level) + result.getChunkInfos().add(std::make_shared(data_part->info.level)); + return result; } } else diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 05751e0fa6f..d8cfce1ca99 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -1,14 +1,27 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include + +#include namespace ProfileEvents { extern const Event DuplicatedInsertedBlocks; } +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + namespace DB { @@ -58,12 +71,12 @@ void MergeTreeSink::onCancel() { } -void MergeTreeSink::consume(Chunk chunk) +void MergeTreeSink::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(nullptr, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); @@ -76,6 +89,18 @@ void MergeTreeSink::consume(Chunk chunk) size_t streams = 0; bool support_parallel_write = false; + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in MergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + const bool need_to_define_dedup_token = !token_info->isDefined(); + + String block_dedup_token; + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); + for (auto & current_block : part_blocks) { ProfileEvents::Counters part_counters; @@ -100,22 +125,16 @@ void MergeTreeSink::consume(Chunk chunk) if (!temp_part.part) continue; + if (need_to_define_dedup_token) + { + chassert(temp_part.part); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + } + if (!support_parallel_write && temp_part.part->getDataPartStorage().supportParallelWrite()) support_parallel_write = true; - String block_dedup_token; - if (storage.getDeduplicationLog()) - { - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - } - size_t max_insert_delayed_streams_for_parallel_write; if (settings.max_insert_delayed_streams_for_parallel_write.changed) @@ -127,6 +146,7 @@ void MergeTreeSink::consume(Chunk chunk) /// In case of too much columns/parts in block, flush explicitly. streams += temp_part.streams.size(); + if (streams > max_insert_delayed_streams_for_parallel_write) { finishDelayedChunk(); @@ -143,11 +163,16 @@ void MergeTreeSink::consume(Chunk chunk) { .temp_part = std::move(temp_part), .elapsed_ns = elapsed_ns, - .block_dedup_token = std::move(block_dedup_token), + .block_dedup_token = block_dedup_token, .part_counters = std::move(part_counters), }); } + if (need_to_define_dedup_token) + { + token_info->finishChunkHashes(); + } + finishDelayedChunk(); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); @@ -160,6 +185,8 @@ void MergeTreeSink::finishDelayedChunk() if (!delayed_chunk) return; + const Settings & settings = context->getSettingsRef(); + for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -178,7 +205,8 @@ void MergeTreeSink::finishDelayedChunk() storage.fillNewPartName(part, lock); auto * deduplication_log = storage.getDeduplicationLog(); - if (deduplication_log) + + if (settings.insert_deduplicate && deduplication_log) { const String block_id = part->getZeroLevelPartBlockID(partition.block_dedup_token); auto res = deduplication_log->addPart(block_id, part->info); diff --git a/src/Storages/MergeTree/MergeTreeSink.h b/src/Storages/MergeTree/MergeTreeSink.h index cf6715a3415..90976020d52 100644 --- a/src/Storages/MergeTree/MergeTreeSink.h +++ b/src/Storages/MergeTree/MergeTreeSink.h @@ -25,7 +25,7 @@ public: ~MergeTreeSink() override; String getName() const override { return "MergeTreeSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; void onCancel() override; @@ -36,7 +36,6 @@ private: size_t max_parts_per_block; ContextPtr context; StorageSnapshotPtr storage_snapshot; - UInt64 chunk_dedup_seqnum = 0; /// input chunk ordinal number in case of dedup token UInt64 num_blocks_processed = 0; /// We can delay processing for previous chunk and start writing a new one. diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index a552ee89aee..3dbcb5e5bda 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1297,6 +1297,7 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; + Block projection_header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1314,14 +1315,12 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].header = block_to_squash; - Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); + projection_squashes[i].setHeader(block_to_squash.cloneEmpty()); - if (planned_chunk.hasChunkInfo()) + Chunk squashed_chunk = Squashing::squash(projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()})); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squashes[i].getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1342,12 +1341,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squashes[i]; - auto planned_chunk = projection_squash_plan.flush(); - if (planned_chunk.hasChunkInfo()) + auto squashed_chunk = Squashing::squash(projection_squash_plan.flush()); + if (squashed_chunk) { - Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - - auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squash_plan.getHeader().cloneWithColumns(squashed_chunk.detachColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 4b4f4c33e7d..bbae054fbed 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -1,21 +1,25 @@ -#include -#include -#include -#include -#include #include "Common/Exception.h" #include #include #include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include -#include +#include +#include +#include + #include +#include + namespace ProfileEvents { @@ -253,12 +257,12 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const } template -void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) +void ReplicatedMergeTreeSinkImpl::consume(Chunk & chunk) { if (num_blocks_processed > 0) storage.delayInsertOrThrowIfNeeded(&storage.partial_shutdown_event, context, false); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); const auto & settings = context->getSettingsRef(); @@ -284,13 +288,25 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if constexpr (async_insert) { - const auto & chunk_info = chunk.getChunkInfo(); - if (const auto * async_insert_info_ptr = typeid_cast(chunk_info.get())) + const auto async_insert_info_ptr = chunk.getChunkInfos().get(); + if (async_insert_info_ptr) async_insert_info = std::make_shared(async_insert_info_ptr->offsets, async_insert_info_ptr->tokens); else throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } + String block_dedup_token; + auto token_info = chunk.getChunkInfos().get(); + if (!token_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "TokenInfo is expected for consumed chunk in ReplicatedMergeTreeSink for table: {}", + storage.getStorageID().getNameForLogs()); + + const bool need_to_define_dedup_token = !token_info->isDefined(); + + if (token_info->isDefined()) + block_dedup_token = token_info->getToken(); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; @@ -342,23 +358,10 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) } else { - if (deduplicate) { - String block_dedup_token; - /// We add the hash from the data and partition identifier to deduplication ID. /// That is, do not insert the same data to the same partition twice. - - const String & dedup_token = settings.insert_deduplication_token; - if (!dedup_token.empty()) - { - /// multiple blocks can be inserted within the same insert query - /// an ordinal number is added to dedup token to generate a distinctive block id for each block - block_dedup_token = fmt::format("{}_{}", dedup_token, chunk_dedup_seqnum); - ++chunk_dedup_seqnum; - } - block_id = temp_part.part->getZeroLevelPartBlockID(block_dedup_token); LOG_DEBUG(log, "Wrote block with ID '{}', {} rows{}", block_id, current_block.block.rows(), quorumLogMessage(replicas_num)); } @@ -366,6 +369,13 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) { LOG_DEBUG(log, "Wrote block with {} rows{}", current_block.block.rows(), quorumLogMessage(replicas_num)); } + + if (need_to_define_dedup_token) + { + chassert(temp_part.part); + const auto hash_value = temp_part.part->getPartBlockIDHash(); + token_info->addChunkHash(toString(hash_value.items[0]) + "_" + toString(hash_value.items[1])); + } } profile_events_scope.reset(); @@ -411,17 +421,15 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) )); } + if (need_to_define_dedup_token) + { + token_info->finishChunkHashes(); + } + finishDelayedChunk(zookeeper); delayed_chunk = std::make_unique(); delayed_chunk->partitions = std::move(partitions); - /// If deduplicated data should not be inserted into MV, we need to set proper - /// value for `last_block_is_duplicate`, which is possible only after the part is committed. - /// Othervide we can delay commit. - /// TODO: we can also delay commit if there is no MVs. - if (!settings.deduplicate_blocks_in_dependent_materialized_views) - finishDelayedChunk(zookeeper); - ++num_blocks_processed; } @@ -431,8 +439,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF if (!delayed_chunk) return; - last_block_is_duplicate = false; - for (auto & partition : delayed_chunk->partitions) { ProfileEventsScope scoped_attach(&partition.part_counters); @@ -445,8 +451,6 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF { bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; - last_block_is_duplicate = last_block_is_duplicate || deduplicated; - /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -535,7 +539,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl ProfileEventsScope profile_events_scope; String original_part_dir = part->getDataPartStorage().getPartDirectory(); - auto try_rollback_part_rename = [this, &part, &original_part_dir]() + auto try_rollback_part_rename = [this, &part, &original_part_dir] () { if (original_part_dir == part->getDataPartStorage().getPartDirectory()) return; @@ -1151,8 +1155,16 @@ void ReplicatedMergeTreeSinkImpl::onStart() template void ReplicatedMergeTreeSinkImpl::onFinish() { - auto zookeeper = storage.getZooKeeper(); - finishDelayedChunk(std::make_shared(zookeeper)); + const auto & settings = context->getSettingsRef(); + + ZooKeeperWithFaultInjectionPtr zookeeper = ZooKeeperWithFaultInjection::createInstance( + settings.insert_keeper_fault_injection_probability, + settings.insert_keeper_fault_injection_seed, + storage.getZooKeeper(), + "ReplicatedMergeTreeSink::onFinish", + log); + + finishDelayedChunk(zookeeper); } template diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 39623c20584..7d025361717 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -51,7 +51,7 @@ public: ~ReplicatedMergeTreeSinkImpl() override; void onStart() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; String getName() const override { return "ReplicatedMergeTreeSink"; } @@ -59,16 +59,6 @@ public: /// For ATTACHing existing data on filesystem. bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); - /// For proper deduplication in MaterializedViews - bool lastBlockIsDuplicate() const override - { - /// If MV is responsible for deduplication, block is not considered duplicating. - if (context->getSettingsRef().deduplicate_blocks_in_dependent_materialized_views) - return false; - - return last_block_is_duplicate; - } - struct DelayedChunk; private: std::vector detectConflictsInAsyncBlockIDs(const std::vector & ids); @@ -126,7 +116,6 @@ private: bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; - bool last_block_is_duplicate = false; UInt64 num_blocks_processed = 0; LoggerPtr log; diff --git a/src/Storages/MessageQueueSink.cpp b/src/Storages/MessageQueueSink.cpp index 4fb81d69070..36899011e33 100644 --- a/src/Storages/MessageQueueSink.cpp +++ b/src/Storages/MessageQueueSink.cpp @@ -40,7 +40,7 @@ void MessageQueueSink::onFinish() producer->finish(); } -void MessageQueueSink::consume(Chunk chunk) +void MessageQueueSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); if (columns.empty()) diff --git a/src/Storages/MessageQueueSink.h b/src/Storages/MessageQueueSink.h index b3c1e61734f..4a9248c6c4d 100644 --- a/src/Storages/MessageQueueSink.h +++ b/src/Storages/MessageQueueSink.h @@ -35,7 +35,7 @@ public: String getName() const override { return storage_name + "Sink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onStart() override; void onFinish() override; diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index 0b88a9e8929..8f0e2d76473 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -644,7 +644,13 @@ bool StorageNATS::streamToViews() insert->table_id = table_id; // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, nats_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + nats_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp index f2f6eac333c..d2bdd0af302 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.cpp @@ -39,12 +39,12 @@ StorageObjectStorageSink::StorageObjectStorageSink( configuration->format, *write_buf, sample_block, context, format_settings_); } -void StorageObjectStorageSink::consume(Chunk chunk) +void StorageObjectStorageSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageObjectStorageSink::onCancel() diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSink.h b/src/Storages/ObjectStorage/StorageObjectStorageSink.h index e0081193686..6ab531bb21a 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSink.h +++ b/src/Storages/ObjectStorage/StorageObjectStorageSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "StorageObjectStorageSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 4388864434e..14b828e7268 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -454,7 +454,13 @@ bool StorageObjectStorageQueue::streamToViews() while (!shutdown_called && !file_iterator->isFinished()) { - InterpreterInsertQuery interpreter(insert, queue_context, false, true, true); + InterpreterInsertQuery interpreter( + insert, + queue_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto read_from_format_info = prepareReadingFromFormat( block_io.pipeline.getHeader().getNames(), diff --git a/src/Storages/PartitionedSink.cpp b/src/Storages/PartitionedSink.cpp index 09b009b26d8..ee2570756ed 100644 --- a/src/Storages/PartitionedSink.cpp +++ b/src/Storages/PartitionedSink.cpp @@ -51,7 +51,7 @@ SinkPtr PartitionedSink::getSinkForPartitionKey(StringRef partition_key) return it->second; } -void PartitionedSink::consume(Chunk chunk) +void PartitionedSink::consume(Chunk & chunk) { const auto & columns = chunk.getColumns(); @@ -104,7 +104,7 @@ void PartitionedSink::consume(Chunk chunk) for (const auto & [partition_key, partition_index] : partition_id_to_chunk_index) { auto sink = getSinkForPartitionKey(partition_key); - sink->consume(std::move(partition_index_to_chunk[partition_index])); + sink->consume(partition_index_to_chunk[partition_index]); } } diff --git a/src/Storages/PartitionedSink.h b/src/Storages/PartitionedSink.h index 68edeb6fd73..fcd67556dc9 100644 --- a/src/Storages/PartitionedSink.h +++ b/src/Storages/PartitionedSink.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartitionedSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onException(std::exception_ptr exception) override; diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index ba3cc6f58d0..44479bd01e2 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -697,7 +697,13 @@ void MaterializedPostgreSQLConsumer::syncTables() insert->table_id = storage->getStorageID(); insert->columns = std::make_shared(buffer->columns_ast); - InterpreterInsertQuery interpreter(insert, insert_context, true); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ true, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto io = interpreter.execute(); auto input = std::make_shared( result_rows.cloneEmpty(), Chunk(result_rows.getColumns(), result_rows.rows())); diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index 2bb1e2dde0d..f632e553a0d 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -437,7 +437,13 @@ StorageInfo PostgreSQLReplicationHandler::loadFromSnapshot(postgres::Connection auto insert_context = materialized_storage->getNestedTableContext(); - InterpreterInsertQuery interpreter(insert, insert_context); + InterpreterInsertQuery interpreter( + insert, + insert_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); const StorageInMemoryMetadata & storage_metadata = nested_storage->getInMemoryMetadata(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index e4b19992151..f3d2aff68c8 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -1129,7 +1129,13 @@ bool StorageRabbitMQ::tryStreamToViews() } // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + InterpreterInsertQuery interpreter( + insert, + rabbitmq_context, + /* allow_materialized */ false, + /* no_squash */ true, + /* no_destination */ true, + /* async_isnert */ false); auto block_io = interpreter.execute(); block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp index 90792c59d38..4b5188ca9f2 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.cpp @@ -218,7 +218,7 @@ std::pair EmbeddedRocksDBBulkSink::seriali return {std::move(serialized_key_column), std::move(serialized_value_column)}; } -void EmbeddedRocksDBBulkSink::consume(Chunk chunk_) +void EmbeddedRocksDBBulkSink::consume(Chunk & chunk_) { std::vector chunks_to_write = squash(std::move(chunk_)); @@ -247,7 +247,10 @@ void EmbeddedRocksDBBulkSink::onFinish() { /// If there is any data left, write it. if (!chunks.empty()) - consume({}); + { + Chunk empty; + consume(empty); + } } String EmbeddedRocksDBBulkSink::getTemporarySSTFilePath() diff --git a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h index 1f548e7813d..64190c8c86f 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBBulkSink.h @@ -32,7 +32,7 @@ public: ~EmbeddedRocksDBBulkSink() override; - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index c451cfd1bf5..1f7f6939f40 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -29,7 +29,7 @@ EmbeddedRocksDBSink::EmbeddedRocksDBSink( serializations = getHeader().getSerializations(); } -void EmbeddedRocksDBSink::consume(Chunk chunk) +void EmbeddedRocksDBSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.h b/src/Storages/RocksDB/EmbeddedRocksDBSink.h index 011322df829..2e1e0c7b429 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.h +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.h @@ -17,7 +17,7 @@ public: StorageEmbeddedRocksDB & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "EmbeddedRocksDBSink"; } private: diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index b9d3e071b6c..3473166a080 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -313,7 +313,8 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index a3f6b6afc5d..b064fba223a 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -607,7 +607,7 @@ public: String getName() const override { return "BufferSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { size_t rows = chunk.getNumRows(); if (!rows) @@ -1020,7 +1020,13 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl auto insert_context = Context::createCopy(getContext()); insert_context->makeQueryContext(); - InterpreterInsertQuery interpreter{insert, insert_context, allow_materialized}; + InterpreterInsertQuery interpreter( + insert, + insert_context, + allow_materialized, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); PushingPipelineExecutor executor(block_io.pipeline); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 849fa5dbe0b..67586985ce8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -1050,7 +1050,13 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, query_context); + InterpreterInsertQuery interpreter( + new_query, + query_context, + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); pipeline.addCompletedPipeline(interpreter.execute().pipeline); } else diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 7f39ff615f0..3fb397c7b81 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1778,12 +1778,12 @@ public: String getName() const override { return "StorageFileSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { std::lock_guard cancel_lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onCancel() override diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index 20f99070000..c80e799a92b 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -119,10 +119,10 @@ public: std::string getName() const override { return "StorageKeeperMapSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -1248,7 +1248,10 @@ void StorageKeeperMap::mutate(const MutationCommands & commands, ContextPtr loca Block block; while (executor.pull(block)) - sink->consume(Chunk{block.getColumns(), block.rows()}); + { + auto chunk = Chunk(block.getColumns(), block.rows()); + sink->consume(chunk); + } sink->finalize(strict); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index de0324d7998..463694c63aa 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -21,7 +22,6 @@ #include #include -#include "StorageLogSettings.h" #include #include #include @@ -341,7 +341,7 @@ public: } } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -398,9 +398,9 @@ private: }; -void LogSink::consume(Chunk chunk) +void LogSink::consume(Chunk & chunk) { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); metadata_snapshot->check(block, true); for (auto & stream : streams | boost::adaptors::map_values) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f69c4adb552..b1bd7053c2e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -63,7 +63,7 @@ public: String getName() const override { return "MemorySink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); storage_snapshot->metadata->check(block, true); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 62a2a048642..e0818fafae9 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include @@ -107,12 +106,12 @@ public: String getName() const override { return "StorageMongoDBSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { Poco::MongoDB::Database db(db_name); Poco::MongoDB::Document::Vector documents; - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t num_rows = block.rows(); size_t num_cols = block.columns(); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index da391909dff..2a8a7bd2ee7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -151,9 +151,9 @@ public: String getName() const override { return "StorageMySQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); auto blocks = splitBlocks(block, max_batch_rows); mysqlxx::Transaction trans(entry); try diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index a8713c61e4d..cdfeab62b58 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -227,9 +227,9 @@ public: String getName() const override { return "PostgreSQLSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); if (!inserter) { if (on_conflict.empty()) diff --git a/src/Storages/StorageRedis.cpp b/src/Storages/StorageRedis.cpp index 83bb3c606c9..1a275320f43 100644 --- a/src/Storages/StorageRedis.cpp +++ b/src/Storages/StorageRedis.cpp @@ -147,7 +147,7 @@ class RedisSink : public SinkToStorage public: RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadata_snapshot_); - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; String getName() const override { return "RedisSink"; } private: @@ -169,10 +169,10 @@ RedisSink::RedisSink(StorageRedis & storage_, const StorageMetadataPtr & metadat } } -void RedisSink::consume(Chunk chunk) +void RedisSink::consume(Chunk & chunk) { auto rows = chunk.getNumRows(); - auto block = getHeader().cloneWithColumns(chunk.detachColumns()); + auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString wb_key; WriteBufferFromOwnString wb_value; @@ -567,7 +567,8 @@ void StorageRedis::mutate(const MutationCommands & commands, ContextPtr context_ Block block; while (executor.pull(block)) { - sink->consume(Chunk{block.getColumns(), block.rows()}); + Chunk chunk(block.getColumns(), block.rows()); + sink->consume(chunk); } } diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 179e4cee199..85417a2f2a4 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -141,7 +141,7 @@ public: String getName() const override { return "SQLiteSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); WriteBufferFromOwnString sqlbuf; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 5b7f9fc0ac2..0d094c15880 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -44,7 +44,7 @@ public: const String & backup_file_name_, bool persistent_); String getName() const override { return "SetOrJoinSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onFinish() override; private: @@ -82,9 +82,9 @@ SetOrJoinSink::SetOrJoinSink( { } -void SetOrJoinSink::consume(Chunk chunk) +void SetOrJoinSink::consume(Chunk & chunk) { - Block block = getHeader().cloneWithColumns(chunk.detachColumns()); + Block block = getHeader().cloneWithColumns(chunk.getColumns()); table.insertBlock(block, getContext()); if (persistent) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 8df87d6290f..9b6d9f041e1 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -226,9 +226,9 @@ public: } } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { - block_out->write(getHeader().cloneWithColumns(chunk.detachColumns())); + block_out->write(getHeader().cloneWithColumns(chunk.getColumns())); } void onFinish() override diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 895da028fc2..90e05c44e31 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -565,12 +565,12 @@ StorageURLSink::StorageURLSink( } -void StorageURLSink::consume(Chunk chunk) +void StorageURLSink::consume(Chunk & chunk) { std::lock_guard lock(cancel_mutex); if (cancelled) return; - writer->write(getHeader().cloneWithColumns(chunk.detachColumns())); + writer->write(getHeader().cloneWithColumns(chunk.getColumns())); } void StorageURLSink::onCancel() diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index fa7cc6eeeef..1804079e75f 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -251,7 +251,7 @@ public: const String & method = Poco::Net::HTTPRequest::HTTP_POST); std::string getName() const override { return "StorageURLSink"; } - void consume(Chunk chunk) override; + void consume(Chunk & chunk) override; void onCancel() override; void onException(std::exception_ptr exception) override; void onFinish() override; diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index cb46cd19517..c9c606de049 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -119,7 +119,7 @@ public: ZooKeeperSink(const Block & header, ContextPtr context) : SinkToStorage(header), zookeeper(context->getZooKeeper()) { } String getName() const override { return "ZooKeeperSink"; } - void consume(Chunk chunk) override + void consume(Chunk & chunk) override { auto block = getHeader().cloneWithColumns(chunk.getColumns()); size_t rows = block.rows(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 77e6ee9cb24..e36247103c7 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -304,7 +305,7 @@ namespace public: explicit AddingAggregatedChunkInfoTransform(Block header) : ISimpleTransform(header, header, false) { } - void transform(Chunk & chunk) override { chunk.setChunkInfo(std::make_shared()); } + void transform(Chunk & chunk) override { chunk.getChunkInfos().add(std::make_shared()); } String getName() const override { return "AddingAggregatedChunkInfoTransform"; } }; @@ -689,7 +690,13 @@ inline void StorageWindowView::fire(UInt32 watermark) StoragePtr target_table = getTargetTable(); auto insert = std::make_shared(); insert->table_id = target_table->getStorageID(); - InterpreterInsertQuery interpreter(insert, getContext()); + InterpreterInsertQuery interpreter( + insert, + getContext(), + /* allow_materialized */ false, + /* no_squash */ false, + /* no_destination */ false, + /* async_isnert */ false); auto block_io = interpreter.execute(); auto pipe = Pipe(std::make_shared(blocks, header)); @@ -1413,7 +1420,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) } void StorageWindowView::writeIntoWindowView( - StorageWindowView & window_view, const Block & block, ContextPtr local_context) + StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr local_context) { window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) @@ -1428,7 +1435,7 @@ void StorageWindowView::writeIntoWindowView( window_view.max_watermark = window_view.getWindowUpperBound(first_record_timestamp); } - Pipe pipe(std::make_shared(block.cloneEmpty(), Chunk(block.getColumns(), block.rows()))); + Pipe pipe(std::make_shared(block)); UInt32 lateness_bound = 0; UInt32 t_max_watermark = 0; @@ -1473,10 +1480,10 @@ void StorageWindowView::writeIntoWindowView( auto syntax_result = TreeRewriter(local_context).analyze(query, columns); auto filter_expression = ExpressionAnalyzer(filter_function, syntax_result, local_context).getActionsDAG(false); - pipe.addSimpleTransform([&](const Block & header) + pipe.addSimpleTransform([&](const Block & header_) { return std::make_shared( - header, std::make_shared(filter_expression), + header_, std::make_shared(filter_expression), filter_function->getColumnName(), true); }); } @@ -1531,6 +1538,30 @@ void StorageWindowView::writeIntoWindowView( QueryProcessingStage::WithMergeableState); builder = select_block.buildQueryPipeline(); + + builder.addSimpleTransform([&](const Block & stream_header) + { + // Can't move chunk_infos here, that function could be called several times + return std::make_shared(chunk_infos.clone(), stream_header); + }); + + String window_view_id = window_view.getStorageID().hasUUID() ? toString(window_view.getStorageID().uuid) : window_view.getStorageID().getFullNameNotQuoted(); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(window_view_id, stream_header); + }); + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared(stream_header); + }); + +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer tmp table before squashing", stream_header); + }); +#endif + builder.addSimpleTransform([&](const Block & current_header) { return std::make_shared( @@ -1570,6 +1601,13 @@ void StorageWindowView::writeIntoWindowView( lateness_upper_bound); }); +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Afrer WatermarkTransform", stream_header); + }); +#endif + auto inner_table = window_view.getInnerTable(); auto lock = inner_table->lockForShare( local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); @@ -1586,9 +1624,16 @@ void StorageWindowView::writeIntoWindowView( auto convert_actions = std::make_shared( convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); - builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + builder.addSimpleTransform([&](const Block & header_) { return std::make_shared(header_, convert_actions); }); } +#ifdef ABORT_ON_LOGICAL_ERROR + builder.addSimpleTransform([&](const Block & stream_header) + { + return std::make_shared("StorageWindowView: Before out", stream_header); + }); +#endif + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index f79867df424..14ac65091d3 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -166,7 +166,7 @@ public: BlockIO populate(); - static void writeIntoWindowView(StorageWindowView & window_view, const Block & block, ContextPtr context); + static void writeIntoWindowView(StorageWindowView & window_view, Block && block, Chunk::ChunkInfoCollection && chunk_infos, ContextPtr context); ASTPtr getMergeableQuery() const { return mergeable_query->clone(); } diff --git a/tests/integration/test_force_deduplication/test.py b/tests/integration/test_force_deduplication/test.py index 87b2c45bbc5..14c11bc8500 100644 --- a/tests/integration/test_force_deduplication/test.py +++ b/tests/integration/test_force_deduplication/test.py @@ -29,6 +29,8 @@ def get_counts(): def test_basic(start_cluster): + old_src, old_a, old_b, old_c = 0, 0, 0, 0 + node.query( """ CREATE TABLE test (A Int64) ENGINE = ReplicatedMergeTree ('/clickhouse/test/tables/test','1') ORDER BY tuple(); @@ -39,6 +41,15 @@ def test_basic(start_cluster): INSERT INTO test values(999); """ ) + + src, a, b, c = get_counts() + assert src == old_src + 1 + assert a == old_a + 2 + assert b == old_b + 2 + assert c == old_c + 2 + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A with pytest.raises(QueryRuntimeException): node.query( """ @@ -46,22 +57,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c - old_src, old_a, old_b, old_c = get_counts() - # number of rows in test_mv_a and test_mv_c depends on order of inserts into views - assert old_src == 11 - assert old_a in (1, 11) - assert old_b == 1 - assert old_c in (1, 11) - + # deduplication only for src table node.query("INSERT INTO test SELECT number FROM numbers(10)") src, a, b, c = get_counts() - # no changes because of deduplication in source table assert src == old_src - assert a == old_a - assert b == old_b - assert c == old_c + assert a == old_a + 10 + assert b == old_b + 10 + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for MV tables does not work, because previous inserts have not written their deduplications tokens to the log due to `deduplicate_blocks_in_dependent_materialized_views = 0`. node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; @@ -69,11 +81,27 @@ def test_basic(start_cluster): """ ) src, a, b, c = get_counts() - assert src == 11 - assert a == old_a + 10 # first insert could be succesfull with disabled dedup - assert b == 11 + assert src == old_src + assert a == old_a + 10 + assert b == old_b + 10 assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all the tables + node.query( + """ + SET deduplicate_blocks_in_dependent_materialized_views = 1; + INSERT INTO test SELECT number FROM numbers(10); + """ + ) + src, a, b, c = get_counts() + assert src == old_src + assert a == old_a + assert b == old_b + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c + + # that issert fails on test_mv_b due to partitions by A, it is an uniq data which is not deduplicated with pytest.raises(QueryRuntimeException): node.query( """ @@ -82,16 +110,23 @@ def test_basic(start_cluster): INSERT INTO test SELECT number FROM numbers(100,10); """ ) + src, a, b, c = get_counts() + assert src == old_src + 10 + assert a == old_a + 10 + assert b == old_b + assert c == old_c + 10 + old_src, old_a, old_b, old_c = src, a, b, c + # deduplication for all tables, except test_mv_b. For test_mv_b it is an uniq data which is not deduplicated due to exception at previous insert node.query( """ SET deduplicate_blocks_in_dependent_materialized_views = 1; INSERT INTO test SELECT number FROM numbers(100,10); """ ) - src, a, b, c = get_counts() - assert src == 21 - assert a == old_a + 20 - assert b == 21 - assert c == old_c + 20 + assert src == old_src + assert a == old_a + assert b == old_b + 10 + assert c == old_c + old_src, old_a, old_b, old_c = src, a, b, c diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference index adf6abb7298..9c9281dc7e4 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.reference @@ -1,7 +1,7 @@ 2 3 -2 +3 3 1 diff --git a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql index d3c4da86b41..51e6a513608 100644 --- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql @@ -29,7 +29,7 @@ INSERT INTO without_deduplication VALUES (43); SELECT count() FROM with_deduplication; SELECT count() FROM without_deduplication; --- Implicit insert isn't deduplicated +-- Implicit insert isn't deduplicated, because deduplicate_blocks_in_dependent_materialized_views = 0 by default SELECT ''; SELECT countMerge(cnt) FROM with_deduplication_mv; SELECT countMerge(cnt) FROM without_deduplication_mv; diff --git a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh index 1fb219108da..8f7d19028b0 100755 --- a/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh +++ b/tests/queries/0_stateless/00633_materialized_view_and_too_many_parts_zookeeper.sh @@ -36,8 +36,8 @@ ${CLICKHOUSE_CLIENT} --query "DROP TABLE c" echo ${CLICKHOUSE_CLIENT} --query "CREATE TABLE root (d UInt64) ENGINE = Null" ${CLICKHOUSE_CLIENT} --query "CREATE MATERIALIZED VIEW d (d UInt64) ENGINE = ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/d', '1') ORDER BY d AS SELECT * FROM root" -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; -${CLICKHOUSE_CLIENT} --query "INSERT INTO root VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; +${CLICKHOUSE_CLIENT} --query "INSERT INTO root SETTINGS deduplicate_blocks_in_dependent_materialized_views=1 VALUES (1)"; ${CLICKHOUSE_CLIENT} --query "SELECT * FROM d"; ${CLICKHOUSE_CLIENT} --query "DROP TABLE root" ${CLICKHOUSE_CLIENT} --query "DROP TABLE d" diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a9801e3b910..dadf2f35e6e 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -137,7 +137,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 60 select count() from testXA; @@ -185,7 +185,7 @@ select arrayUniq(thread_ids) from system.query_log where Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '16'; -5 +18 select count() from testX; 80 select count() from testXA; diff --git a/tests/queries/0_stateless/01927_query_views_log_current_database.sql b/tests/queries/0_stateless/01927_query_views_log_current_database.sql index ba42795333c..6287156daaf 100644 --- a/tests/queries/0_stateless/01927_query_views_log_current_database.sql +++ b/tests/queries/0_stateless/01927_query_views_log_current_database.sql @@ -16,6 +16,7 @@ CREATE MATERIALIZED VIEW matview_b_to_c TO table_c AS SELECT SUM(a + sleepEachRo CREATE MATERIALIZED VIEW matview_join_d_e TO table_f AS SELECT table_d.a as a, table_e.count + sleepEachRow(0.000003) as count FROM table_d LEFT JOIN table_e ON table_d.a = table_e.a; -- ENABLE LOGS +SET parallel_view_processing=0; SET log_query_views=1; SET log_queries_min_type='QUERY_FINISH'; SET log_queries=1; diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference index e0cc8f0ce63..2d9f236ada9 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.reference @@ -1,8 +1,8 @@ -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent -18 9 9 9 -deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent -18 18 9 18 -deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data +18 18 18 18 +deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0 +18 36 27 36 +deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data 18 18 18 18 diff --git a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql index fdd75b91b1f..465c8d6136c 100644 --- a/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql +++ b/tests/queries/0_stateless/02124_insert_deduplication_token_materialized_views.sql @@ -1,6 +1,6 @@ -- Tags: long -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = no, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; drop table if exists test sync; drop table if exists test_mv_a sync; @@ -35,7 +35,7 @@ select (select sum(c) from test_mv_c where test='case1'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = no, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; @@ -53,7 +53,7 @@ select (select sum(c) from test_mv_c where test='case2'); -select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results inconsitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=0, insert_deduplication_token = yes, results: test_mv_a and test_mv_c have all data, test_mv_b has data obly with max_partitions_per_insert_block=0'; set deduplicate_blocks_in_dependent_materialized_views=0; @@ -70,7 +70,7 @@ select (select sum(c) from test_mv_b where test='case3'), (select sum(c) from test_mv_c where test='case3'); -select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results consitent'; +select 'deduplicate_blocks_in_dependent_materialized_views=1, insert_deduplication_token = yes, results: all tables have deduplicated data'; set deduplicate_blocks_in_dependent_materialized_views=1; diff --git a/tests/queries/0_stateless/02125_query_views_log.sql b/tests/queries/0_stateless/02125_query_views_log.sql index d2d19b76a1f..ba50902ebea 100644 --- a/tests/queries/0_stateless/02125_query_views_log.sql +++ b/tests/queries/0_stateless/02125_query_views_log.sql @@ -8,7 +8,7 @@ create table dst (key Int) engine=Null(); create materialized view mv1 to dst as select * from src; create materialized view mv2 to dst as select * from src; -insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=1; +insert into src select * from numbers(1e6) settings log_queries=1, max_untracked_memory=0, parallel_view_processing=0; system flush logs; -- { echo } diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference index 335b55f05c8..07deb7c2565 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.reference @@ -10,13 +10,14 @@ 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 +2023-09-01 12:00:00 42 -- Original issue with deduplicate_blocks_in_dependent_materialized_views = 1 AND max_insert_delayed_streams_for_parallel_write > 1 -- Landing 2022-09-01 12:23:34 42 2023-09-01 12:23:34 42 -- MV -2022-09-01 12:00:00 42 +2022-09-01 12:00:00 84 2023-09-01 12:00:00 42 -- Regression introduced in https://github.com/ClickHouse/ClickHouse/pull/54184 -- Landing (Agg/Replacing)MergeTree diff --git a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql index f206f0d7775..a2378fd8f67 100644 --- a/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql +++ b/tests/queries/0_stateless/02912_ingestion_mv_deduplication.sql @@ -54,8 +54,9 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view - 1st insert works for landing and mv tables - 2nd insert gets first block 20220901 deduplicated and second one inserted in landing table - - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded + - 2nd insert is not inserting anything in mv table due to a bug computing blocks to be discarded, now that block is inserted because deduplicate_blocks_in_dependent_materialized_views=0 + Now it is fixed. */ SET deduplicate_blocks_in_dependent_materialized_views = 0, max_insert_delayed_streams_for_parallel_write = 1000; @@ -97,7 +98,7 @@ SELECT '-- Original issue with deduplicate_blocks_in_dependent_materialized_view This is what happens now: - 1st insert works for landing and mv tables - - 2nd insert gets first block 20220901 deduplicated and second one inserted for landing and mv tables + - 2nd insert gets first block 20220901 deduplicated for landing and both rows are inserted for mv tables */ SET deduplicate_blocks_in_dependent_materialized_views = 1, max_insert_delayed_streams_for_parallel_write = 1000; diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python new file mode 100644 index 00000000000..dd1058518c9 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -0,0 +1,657 @@ +#!/usr/bin/env python3 + +import os +import sys +import argparse +import string + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) + + +def __format(template, **params): + field_names = [v[1] for v in string.Formatter().parse(template) if v[1] is not None] + kv_args = {} + for field in field_names: + if field in params: + kv_args[field] = params[field] + else: + kv_args[field] = "" + + return template.format(**kv_args) + + +def instance_create_statement( + table_name, + table_columns, + table_keys, + table_engine, + with_deduplication, + no_merges=True, +): + template = """ + CREATE TABLE {table_name} + {table_columns} + ENGINE = {table_engine} + ORDER BY {table_keys} + {table_settings}; + {table_no_merges} + """ + + params = dict() + params["table_name"] = table_name + params["table_columns"] = table_columns + params["table_keys"] = table_keys + params["table_no_merges"] = f"SYSTEM STOP MERGES {table_name};" if no_merges else "" + params["table_engine"] = ( + "MergeTree()" + if table_engine == "MergeTree" + else f"ReplicatedMergeTree('/clickhouse/tables/{{database}}/{table_name}', '1')" + ) + + deduplication_window_setting_name = ( + "non_replicated_deduplication_window" + if table_engine == "MergeTree" + else "replicated_deduplication_window" + ) + deduplication_window_setting_value = 1000 if with_deduplication else 0 + + settings = list() + settings += [ + f"{deduplication_window_setting_name}={deduplication_window_setting_value}" + ] + params["table_settings"] = "SETTINGS " + ",".join(settings) + + return __format(template, **params) + + +def instance_insert_statement( + table_name, count, insert_method, insert_unique_blocks, use_insert_token +): + insert_settings = ( + "" if not use_insert_token else "SETTINGS insert_deduplication_token='UDT'" + ) + + if insert_method == "InsertSelect": + template = """ + INSERT INTO {table_name} + SELECT {insert_columns} + FROM numbers({count}) {insert_settings}; + """ + return __format( + template, + table_name=table_name, + count=count, + insert_columns="'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number", + insert_settings=insert_settings, + ) + + else: + template = """ + INSERT INTO {table_name} + {insert_settings} VALUES {insert_values}; + """ + + values = [] + for i in range(count): + values += ( + [f"('src_{i}', {i})"] if insert_unique_blocks else ["('src_4', 4)"] + ) + insert_values = ", ".join(values) + + return __format( + template, + table_name=table_name, + insert_settings=insert_settings, + insert_values=insert_values, + ) + + +def get_drop_tables_statements(tables): + return "".join( + [f"DROP TABLE IF EXISTS {table_name};\n" for table_name in tables[::-1]] + ) + + +def get_logs_statement(args): + if args.get_logs: + return "SET send_logs_level='test';" + return "" + + +def str2bool(v): + if isinstance(v, bool): + return v + if v.lower() in ("yes", "true", "t", "y", "1"): + return True + elif v.lower() in ("no", "false", "f", "n", "0"): + return False + else: + raise argparse.ArgumentTypeError("Boolean value expected.") + + +class ArgsFactory: + def __init__(self, parser): + self.__parser = parser + + def add_opt_engine(self): + self.__parser.add_argument( + "--table-engine", + choices=["ReplicatedMergeTree", "MergeTree"], + default="MergeTree", + ) + + def add_opt_user_token(self): + self.__parser.add_argument( + "--use-insert-token", type=str2bool, nargs="?", const=True, default=False + ) + + def add_opt_single_thread(self): + self.__parser.add_argument( + "--single-thread", type=str2bool, nargs="?", const=True, default=True + ) + + def add_opt_dedup_src(self): + self.__parser.add_argument( + "--deduplicate-src-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) + + def add_opt_dedup_dst(self): + self.__parser.add_argument( + "--deduplicate-dst-table", + type=str2bool, + nargs="?", + const=True, + default=True, + ) + + def add_opt_get_logs(self): + self.__parser.add_argument( + "--get-logs", type=str2bool, nargs="?", const=True, default=False + ) + + def add_opt_uniq_blocks(self): + self.__parser.add_argument( + "--insert-unique-blocks", type=str2bool, nargs="?", const=True, default=True + ) + + def add_opt_insert_method(self): + self.__parser.add_argument( + "--insert-method", + choices=["InsertSelect", "InsertValues"], + default="InsertSelect", + ) + + def add_all(self): + self.add_opt_engine() + self.add_opt_user_token() + self.add_opt_single_thread() + self.add_opt_dedup_src() + self.add_opt_dedup_dst() + self.add_opt_get_logs() + self.add_opt_insert_method() + self.add_opt_uniq_blocks() + + +def test_insert_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_statement = instance_create_statement( + table_name="table_when_b_even", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + create_mv_statement = """ + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even + AS + SELECT a, b + FROM table_a_b + WHERE b % 2 = 0; + """ + + drop_tables_statements = get_drop_tables_statements( + ["table_a_b", "table_when_b_even", "mv_b_even"] + ) + + insert_statement = instance_insert_statement( + "table_a_b", + 10, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + print_details_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + {"" if not args.get_logs else "SELECT _part, count() FROM table_a_b GROUP BY _part ORDER BY _part;"} + + SELECT 'table_when_b_even'; + SELECT 'count', count() FROM table_when_b_even; + {"" if not args.get_logs else "SELECT _part, count() FROM table_when_b_even GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 5 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {5 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = """ + SELECT throwIf( count() != 10 ) + FROM table_a_b; + SELECT throwIf( count() != 10 ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {10 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 20} ) + FROM table_a_b; + SELECT throwIf( count() != {1 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_a_b_statement} + + {create_table_when_b_even_statement} + + {create_mv_statement} + + -- first insert + {insert_statement} + + {print_details_statements} + + {assert_first_insert_statements} + + -- second insert, it is retry + {insert_statement} + + {print_details_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_mv_generates_several_blocks(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = [ + "table_for_join_with", + "table_a_b", + "table_when_b_even_and_joined", + "mv_b_even", + ] + drop_tables_statements = get_drop_tables_statements(tables) + + details_print_for_table_for_join_with = "" + if args.get_logs: + details_print_for_table_for_join_with = """ + SELECT 'table_for_join_with'; + SELECT a_join, b, _part FROM table_for_join_with ORDER BY _part, a_join, b; + """ + + create_table_a_b_statement = instance_create_statement( + table_name="table_a_b", + table_columns="(a_src String, b UInt64)", + table_keys="(a_src, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_when_b_even_and_joined_statement = instance_create_statement( + table_name="table_when_b_even_and_joined", + table_columns="(a_src String, a_join String, b UInt64)", + table_keys="(a_src, a_join, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_a_b", + 5, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + details_print_statements = f""" + SELECT 'table_a_b'; + SELECT 'count', count() FROM table_a_b; + + SELECT 'table_when_b_even_and_joined'; + SELECT 'count', count() FROM table_when_b_even_and_joined; + {"" if not args.get_logs else "SELECT _part, a_src, a_join, b FROM table_when_b_even_and_joined ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 5 ) + FROM table_a_b; + + SELECT throwIf( count() != 9 ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {9 if args.deduplicate_dst_table else 18} ) + FROM table_when_b_even_and_joined; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {5 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {10 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even_and_joined; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 5} ) + FROM table_a_b; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 10} ) + FROM table_when_b_even_and_joined; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 10} ) + FROM table_a_b; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 20} ) + FROM table_when_b_even_and_joined; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + CREATE TABLE table_for_join_with + (a_join String, b UInt64) + ENGINE = MergeTree() + ORDER BY (a_join, b); + INSERT INTO table_for_join_with + SELECT 'joined_' || toString(number), number + FROM numbers(1); + {details_print_for_table_for_join_with} + + {create_table_a_b_statement} + SYSTEM STOP MERGES table_a_b; + + {create_table_when_b_even_and_joined_statement} + SYSTEM STOP MERGES table_when_b_even_and_joined; + + CREATE MATERIALIZED VIEW mv_b_even + TO table_when_b_even_and_joined + AS + SELECT a_src, a_join, table_for_join_with.b as b + FROM table_a_b + FULL OUTER JOIN table_for_join_with + ON table_a_b.b = table_for_join_with.b AND table_a_b.b % 2 = 0 + ORDER BY a_src, a_join, b; + + -- first insert + {insert_statement} + + {details_print_statements} + + -- first assertion + {assert_first_insert_statements} + + -- second insert + {insert_statement} + + {details_print_statements} + + -- second assertion + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def test_several_mv_into_one_table(parser): + ArgsFactory(parser).add_all() + + def calle(args): + tables = ["table_src", "table_dst", "mv_b_even", "mv_b_even_even"] + drop_tables_statements = get_drop_tables_statements(tables) + + create_table_src_statement = instance_create_statement( + table_name="table_src", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_src_table, + ) + + create_table_dst_statement = instance_create_statement( + table_name="table_dst", + table_columns="(a String, b UInt64)", + table_keys="(a, b)", + table_engine=args.table_engine, + with_deduplication=args.deduplicate_dst_table, + ) + + insert_statement = instance_insert_statement( + "table_src", + 8, + args.insert_method, + args.insert_unique_blocks, + args.use_insert_token, + ) + + details_print_statements = f""" + SELECT 'table_src count', count() FROM table_src; + + SELECT 'table_dst count', count() FROM table_dst; + {"" if not args.get_logs else "SELECT _part, count() FROM table_dst GROUP BY _part ORDER BY _part;"} + """ + + if args.insert_unique_blocks: + assert_first_insert_statements = f""" + SELECT throwIf( count() != 8 ) + FROM table_src; + + SELECT throwIf( count() != 6 ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {6 if args.deduplicate_dst_table else 12} ) + FROM table_dst; + """ + else: + if args.use_insert_token: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {8 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {16 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + else: + assert_first_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 8} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 16} ) + FROM table_dst; + """ + assert_second_insert_statements = f""" + SELECT throwIf( count() != {1 if args.deduplicate_src_table else 16} ) + FROM table_src; + + SELECT throwIf( count() != {2 if args.deduplicate_dst_table else 32} ) + FROM table_dst; + """ + + script = f""" + {get_logs_statement(args)} + + SET max_insert_threads={1 if args.single_thread else 10}; + SET update_insert_deduplication_token_in_dependent_materialized_views=1; + SET deduplicate_blocks_in_dependent_materialized_views=1; + + SET max_block_size=1; + SET min_insert_block_size_rows=0; + SET min_insert_block_size_bytes=0; + + {drop_tables_statements} + + {create_table_src_statement} + + {create_table_dst_statement} + + CREATE MATERIALIZED VIEW mv_b_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 2 = 0; + + CREATE MATERIALIZED VIEW mv_b_even_even + TO table_dst + AS + SELECT a, b + FROM table_src + WHERE b % 4 = 0; + + -- first insert + {insert_statement} + + {details_print_statements} + + {assert_first_insert_statements} + + -- second insert, retry + {insert_statement} + + {details_print_statements} + + {assert_second_insert_statements} + + {drop_tables_statements} + """ + + print(script) + + parser.set_defaults(func=calle) + + +def parse_args(): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(dest="test") + test_insert_several_blocks( + subparsers.add_parser("insert_several_blocks_into_table") + ) + test_mv_generates_several_blocks( + subparsers.add_parser("mv_generates_several_blocks") + ) + test_several_mv_into_one_table(subparsers.add_parser("several_mv_into_one_table")) + args = parser.parse_args() + if args.test is None: + parser.print_help() + return args + + +def main(): + args = parse_args() + if args.test is not None: + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference new file mode 100644 index 00000000000..4893274c1cd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.reference @@ -0,0 +1,41 @@ +Different materialized view insert into one underlayed table equal data. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +second attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Different insert operations generate the same data after transformation in underlied table of materialized view. +first attempt +from dst 1 A all_1_1_0 +from mv_dst 0 A all_1_1_0 +second attempt +from dst 1 A all_1_1_0 +from dst 2 A all_2_2_0 +from mv_dst 0 A all_1_1_0 +from mv_dst 0 A all_2_2_0 +Indentical blocks in insertion with `insert_deduplication_token` +first attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +second attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +third attempt +from dst 0 A all_1_1_0 +from dst 0 A all_2_2_0 +Indentical blocks in insertion +from dst 0 A all_1_1_0 +Indentical blocks after materialised view`s transformation +first attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 +second attempt +from dst 1 B all_1_1_0 +from dst 2 B all_2_2_0 +from mv_dst 0 B all_1_1_0 +from mv_dst 0 B all_2_2_0 diff --git a/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql new file mode 100644 index 00000000000..7927a6b1edf --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_cases_from_docs.sql @@ -0,0 +1,331 @@ +-- ######### +select 'Different materialized view insert into one underlayed table equal data.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; +DROP TABLE IF EXISTS mv_first; +DROP TABLE IF EXISTS mv_second; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE TABLE mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_first +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +CREATE MATERIALIZED VIEW mv_second +TO mv_dst +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_second; +DROP TABLE mv_first; +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Different insert operations generate the same data after transformation in underlied table of materialized view.'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst VALUES (1, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst VALUES (2, 'A'); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion with `insert_deduplication_token`'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +select 'first attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +select 'third attempt'; + +INSERT INTO dst SELECT + 1 AS key, + 'b' AS value +FROM numbers(2) +SETTINGS insert_deduplication_token='some_user_token'; + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks in insertion'; + +DROP TABLE IF EXISTS dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +INSERT INTO dst SELECT + 0 AS key, + 'A' AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +DROP TABLE dst; + + +-- ######### +select 'Indentical blocks after materialised view`s transformation'; + +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS mv_dst; + +CREATE TABLE dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000; + +CREATE MATERIALIZED VIEW mv_dst +( + `key` Int64, + `value` String +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS non_replicated_deduplication_window=1000 +AS SELECT + 0 AS key, + value AS value +FROM dst; + +SET max_block_size=1; +SET min_insert_block_size_rows=0; +SET min_insert_block_size_bytes=0; + +SET deduplicate_blocks_in_dependent_materialized_views=1; + +select 'first attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +select 'second attempt'; + +INSERT INTO dst SELECT + number + 1 AS key, + IF(key = 0, 'A', 'B') AS value +FROM numbers(2); + +SELECT + 'from dst', + *, + _part +FROM dst +ORDER by all; + +SELECT + 'from mv_dst', + *, + _part +FROM mv_dst +ORDER by all; + +DROP TABLE mv_dst; +DROP TABLE dst; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference new file mode 100644 index 00000000000..c82a6eaa213 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.reference @@ -0,0 +1,35 @@ +no user deduplication token +partitioned_table is deduplicated bacause deduplication works in scope of one partiotion: +1 A +1 D +2 B +2 C +mv_table is not deduplicated because the inserted blocks was different: +1 A +1 A +1 D +2 B +2 B +2 C +with user deduplication token +partitioned_table is not deduplicated because different tokens: +1 A +1 A +1 D +2 B +2 B +2 C +mv_table is not deduplicated because different tokens: +1 A +1 A +1 D +2 B +2 B +2 C +with incorrect ussage of user deduplication token +partitioned_table is deduplicated because equal tokens: +1 A +2 B +mv_table is deduplicated because equal tokens: +1 A +2 B diff --git a/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql new file mode 100644 index 00000000000..2eb931f7f73 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_into_partitioned_table.sql @@ -0,0 +1,83 @@ +DROP TABLE IF EXISTS partitioned_table; +DROP TABLE IF EXISTS mv_table; + + +SET deduplicate_blocks_in_dependent_materialized_views = 1; + + +SELECT 'no user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated bacause deduplication works in scope of one partiotion:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because the inserted blocks was different:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; + + +SELECT 'with user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_1' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_2' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_3' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is not deduplicated because different tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is not deduplicated because different tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; + + +SELECT 'with incorrect ussage of user deduplication token'; + +CREATE TABLE partitioned_table + (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table', '{replica}') + partition by key % 10 + order by tuple(); + +CREATE MATERIALIZED VIEW mv_table (key Int64, value String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/03008_deduplication_insert_into_partitioned_table_mv', '{replica}') + ORDER BY tuple() + AS SELECT key, value FROM partitioned_table; + +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'B'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'A'), (2, 'C'); +INSERT INTO partitioned_table SETTINGS insert_deduplication_token='token_0' VALUES (1, 'D'), (2, 'B'); + +SELECT 'partitioned_table is deduplicated because equal tokens:'; +SELECT * FROM partitioned_table ORDER BY ALL; +SELECT 'mv_table is deduplicated because equal tokens:'; +SELECT * FROM mv_table ORDER BY ALL; + +DROP TABLE partitioned_table; +DROP TABLE mv_table; diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..bf900aa84d2 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..49eb52b47fd --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference new file mode 100644 index 00000000000..c815324b455 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +table_a_b +count 1 +table_when_b_even +count 1 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 5 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 1 +0 +0 +table_a_b +count 20 +table_when_b_even +count 1 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 10 +table_when_b_even +count 5 +0 +0 +table_a_b +count 20 +table_when_b_even +count 10 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 10 +table_when_b_even +count 10 +0 +0 +table_a_b +count 20 +table_when_b_even +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh new file mode 100755 index 00000000000..53af06d4a6f --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 52: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python insert_several_blocks_into_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference new file mode 100644 index 00000000000..6e76ec46aa8 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh new file mode 100755 index 00000000000..7d4f5240cd1 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference new file mode 100644 index 00000000000..a25e8713c61 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.reference @@ -0,0 +1,962 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 10 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 5 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 1 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 1 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 9 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 2 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_a_b +count 5 +table_when_b_even_and_joined +count 9 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 18 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_a_b +count 5 +table_when_b_even_and_joined +count 10 +0 +0 +table_a_b +count 10 +table_when_b_even_and_joined +count 20 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh new file mode 100755 index 00000000000..109d1674f3a --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 20: engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python mv_generates_several_blocks \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference new file mode 100644 index 00000000000..b6a3e0175a7 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=MergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=MergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh new file mode 100755 index 00000000000..fe3d610a758 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="MergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=MergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference new file mode 100644 index 00000000000..1921103f49e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.reference @@ -0,0 +1,706 @@ + +Test case 0: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 1: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 2: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 3: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 4: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 5: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 6: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 7: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 8: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 9: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 10: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 11: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 12: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 13: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 14: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 15: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 16: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 18: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 19: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 20: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 21: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 22: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 23: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 24: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 25: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 26: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 27: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 28: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 29: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 30: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 31: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 32: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 33: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 34: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 35: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 36: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 37: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 38: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 39: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 40: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 41: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 16 +0 +0 +OK + +Test case 42: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 43: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 8 +table_dst count 32 +0 +0 +OK + +Test case 44: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 45: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 16 +0 +0 +OK + +Test case 46: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 47: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=True single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 48: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 49: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 50: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 51: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 52: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 53: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 54: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 55: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +Test case 56: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 6 +0 +0 +OK + +Test case 57: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False +table_src count 1 +table_dst count 2 +0 +0 +table_src count 1 +table_dst count 2 +0 +0 +OK + +Test case 58: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 8 +table_dst count 12 +0 +0 +OK + +Test case 59: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=True deduplicate_dst_table=False insert_unique_blocks=False +table_src count 1 +table_dst count 16 +0 +0 +table_src count 1 +table_dst count 32 +0 +0 +OK + +Test case 60: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 6 +0 +0 +OK + +Test case 61: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=True insert_unique_blocks=False +table_src count 8 +table_dst count 2 +0 +0 +table_src count 16 +table_dst count 2 +0 +0 +OK + +Test case 62: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=True +table_src count 8 +table_dst count 6 +0 +0 +table_src count 16 +table_dst count 12 +0 +0 +OK + +Test case 63: insert_method=InsertValues engine=ReplicatedMergeTree use_insert_token=False single_thread=False deduplicate_src_table=False deduplicate_dst_table=False insert_unique_blocks=False +table_src count 8 +table_dst count 16 +0 +0 +table_src count 16 +table_dst count 32 +0 +0 +OK + +All cases executed diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh new file mode 100755 index 00000000000..9adee6d53d4 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Tags: long, no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +ENGINE="ReplicatedMergeTree" + +RUN_ONLY="" +#RUN_ONLY="Test case 17: insert_method=InsertSelect engine=ReplicatedMergeTree use_insert_token=False single_thread=True deduplicate_src_table=True deduplicate_dst_table=True insert_unique_blocks=False" + +i=0 +for insert_method in "InsertSelect" "InsertValues"; do + for use_insert_token in "True" "False"; do + for single_thread in "True" "False"; do + for deduplicate_src_table in "True" "False"; do + for deduplicate_dst_table in "True" "False"; do + for insert_unique_blocks in "True" "False"; do + + THIS_RUN="Test case $i:" + THIS_RUN+=" insert_method=$insert_method" + THIS_RUN+=" engine=$ENGINE" + THIS_RUN+=" use_insert_token=$use_insert_token" + THIS_RUN+=" single_thread=$single_thread" + THIS_RUN+=" deduplicate_src_table=$deduplicate_src_table" + THIS_RUN+=" deduplicate_dst_table=$deduplicate_dst_table" + THIS_RUN+=" insert_unique_blocks=$insert_unique_blocks" + + i=$((i+1)) + + echo + if [ -n "$RUN_ONLY" ] && [ "$RUN_ONLY" != "$THIS_RUN" ]; then + echo "skip $THIS_RUN" + continue + fi + echo "$THIS_RUN" + + $CLICKHOUSE_CLIENT --max_insert_block_size 1 -nmq " + $(python3 $CURDIR/03008_deduplication.python several_mv_into_one_table \ + --insert-method $insert_method \ + --table-engine $ENGINE \ + --use-insert-token $use_insert_token \ + --single-thread $single_thread \ + --deduplicate-src-table $deduplicate_src_table \ + --deduplicate-dst-table $deduplicate_dst_table \ + --insert-unique-blocks $insert_unique_blocks \ + --get-logs false \ + ) + " && echo OK || echo FAIL + done + done + done + done + done +done + +echo +echo "All cases executed" diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh index 1e6bfb414d8..cedb651a430 100755 --- a/tests/queries/0_stateless/03035_max_insert_threads_support.sh +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -8,7 +8,7 @@ DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" $CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC -" | grep -o MaterializingTransform | wc -l +" | grep -o StorageFileSink | wc -l DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") rm $DATA_FILE_PATH From 376472c8ceffe82a466067c9fc0039517726da62 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Jul 2024 19:12:23 +0200 Subject: [PATCH 22/64] add test when chunk with 0 columns has to be produced in squashing --- src/Core/Settings.h | 2 +- src/Interpreters/Squashing.cpp | 36 ++++++++++++------- src/Interpreters/Squashing.h | 10 +++--- .../DeduplicationTokenTransforms.cpp | 4 ++- .../0_stateless/01275_parallel_mv.sql.j2 | 6 ++-- .../03008_deduplication_wrong_mv.reference | 15 ++++++++ .../03008_deduplication_wrong_mv.sql | 21 +++++++++++ 7 files changed, 71 insertions(+), 23 deletions(-) create mode 100644 tests/queries/0_stateless/03008_deduplication_wrong_mv.reference create mode 100644 tests/queries/0_stateless/03008_deduplication_wrong_mv.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 81d0aa0c51d..adf437100b1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -952,7 +952,7 @@ class IColumn; #define OBSOLETE_SETTINGS(M, ALIAS) \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ - MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 1) \ + MAKE_OBSOLETE(M, Bool, update_insert_deduplication_token_in_dependent_materialized_views, 0) \ MAKE_OBSOLETE(M, UInt64, max_memory_usage_for_all_queries, 0) \ MAKE_OBSOLETE(M, UInt64, multiple_joins_rewriter_version, 0) \ MAKE_OBSOLETE(M, Bool, enable_debug_queries, false) \ diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 25434d1103e..a076494a4bc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,5 +1,7 @@ #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include @@ -16,6 +18,7 @@ Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_bloc , min_block_size_bytes(min_block_size_bytes_) , header(header_) { + LOG_TEST(getLogger("Squashing"), "header columns {}", header.columns()); } Chunk Squashing::flush() @@ -23,7 +26,7 @@ Chunk Squashing::flush() if (!accumulated) return {}; - auto result = convertToChunk(accumulated.extract()); + auto result = convertToChunk(extract()); chassert(result); return result; } @@ -43,6 +46,8 @@ Chunk Squashing::squash(Chunk && input_chunk) Chunk Squashing::add(Chunk && input_chunk) { + LOG_TEST(getLogger("Squashing"), "add columns {} rows {}", input_chunk.getNumColumns(), input_chunk.getNumRows()); + if (!input_chunk) return {}; @@ -53,11 +58,11 @@ Chunk Squashing::add(Chunk && input_chunk) if (!accumulated) { accumulated.add(std::move(input_chunk)); - return convertToChunk(accumulated.extract()); + return convertToChunk(extract()); } /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); + Chunk res_chunk = convertToChunk(extract()); accumulated.add(std::move(input_chunk)); return res_chunk; } @@ -66,7 +71,7 @@ Chunk Squashing::add(Chunk && input_chunk) if (isEnoughSize()) { /// Return accumulated data and place new block to accumulated data. - Chunk res_chunk = convertToChunk(accumulated.extract()); + Chunk res_chunk = convertToChunk(extract()); accumulated.add(std::move(input_chunk)); return res_chunk; } @@ -76,21 +81,25 @@ Chunk Squashing::add(Chunk && input_chunk) /// If accumulated data is big enough, we send it if (isEnoughSize()) - return convertToChunk(accumulated.extract()); + return convertToChunk(extract()); return {}; } -Chunk Squashing::convertToChunk(std::vector && chunks) const +Chunk Squashing::convertToChunk(CurrentData && data) const { - if (chunks.empty()) + LOG_TEST(getLogger("Squashing"), "convertToChunk {}", data.chunks.size()); + + if (data.chunks.empty()) return {}; auto info = std::make_shared(); - info->chunks = std::move(chunks); + info->chunks = std::move(data.chunks); // It is imortant that chunk is not empty, it has to have columns even if they are empty - auto aggr_chunk = Chunk(header.getColumns(), 0); + // Sometimes there are could be no columns in header but not empty rows in chunks + // That happens when we intend to add defaults for the missing columns after + auto aggr_chunk = Chunk(header.getColumns(), header.columns() ? 0 : data.getRows()); aggr_chunk.getChunkInfos().add(std::move(info)); chassert(aggr_chunk); return aggr_chunk; @@ -149,17 +158,18 @@ bool Squashing::isEnoughSize(const Chunk & chunk) const return isEnoughSize(chunk.getNumRows(), chunk.bytes()); } -void Squashing::CurrentSize::add(Chunk && chunk) +void Squashing::CurrentData::add(Chunk && chunk) { rows += chunk.getNumRows(); bytes += chunk.bytes(); chunks.push_back(std::move(chunk)); } -std::vector Squashing::CurrentSize::extract() +Squashing::CurrentData Squashing::extract() { - auto result = std::move(chunks); - *this = {}; + auto result = std::move(accumulated); + accumulated = {}; return result; } + } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 64a9768a71f..71ed4c4185a 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -49,25 +49,23 @@ public: const Block & getHeader() const { return header; } private: - class CurrentSize + struct CurrentData { std::vector chunks = {}; size_t rows = 0; size_t bytes = 0; - public: explicit operator bool () const { return !chunks.empty(); } size_t getRows() const { return rows; } size_t getBytes() const { return bytes; } void add(Chunk && chunk); - std::vector extract(); }; const size_t min_block_size_rows; const size_t min_block_size_bytes; Block header; - CurrentSize accumulated; + CurrentData accumulated; static Chunk squash(std::vector && input_chunks, Chunk::ChunkInfoCollection && infos); @@ -75,7 +73,9 @@ private: bool isEnoughSize(size_t rows, size_t bytes) const; bool isEnoughSize(const Chunk & chunk) const; - Chunk convertToChunk(std::vector && chunks) const; + CurrentData extract(); + + Chunk convertToChunk(CurrentData && data) const; }; } diff --git a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp index 6786f76cbef..e6f7e44e026 100644 --- a/src/Processors/Transforms/DeduplicationTokenTransforms.cpp +++ b/src/Processors/Transforms/DeduplicationTokenTransforms.cpp @@ -148,9 +148,11 @@ void CheckTokenTransform::transform(Chunk & chunk) auto token_info = chunk.getChunkInfos().get(); if (!token_info) + { throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk has to have DedupTokenInfo as ChunkInfo, {}", debug); + } - LOG_DEBUG(log, "debug: {}, token: {}", debug, token_info->debugToken()); + LOG_TEST(log, "debug: {}, token: {}, columns {} rows {}", debug, token_info->debugToken(), chunk.getNumColumns(), chunk.getNumRows()); } #endif diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 9d74474c1a4..bc663bd0e63 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -12,9 +12,9 @@ drop table if exists testXC; create table testX (A Int64) engine=MergeTree order by tuple(); -create materialized view testXA engine=MergeTree order by tuple() as select sleep(0.1) from testX; -create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX; -create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX; +create materialized view testXA engine=MergeTree order by tuple() as select sleepEachRow(0.2) from testX; +create materialized view testXB engine=MergeTree order by tuple() as select sleepEachRow(0.4), throwIf(A=1) from testX; +create materialized view testXC engine=MergeTree order by tuple() as select sleepEachRow(0.2) from testX; -- { echoOn } {% for parallel_view_processing in [0, 1] %} diff --git a/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference b/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference new file mode 100644 index 00000000000..89b4f324d5e --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_wrong_mv.reference @@ -0,0 +1,15 @@ +-- { echo ON } +CREATE TABLE src (x UInt8) ENGINE = Memory; +CREATE TABLE dst (x UInt8) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv1 TO dst AS SELECT * FROM src; +INSERT INTO src VALUES (0); +SELECT * from dst; +0 +TRUNCATE TABLE dst; +--DROP TABLE src SYNC; +--CREATE TABLE src (y String) ENGINE = MergeTree order by tuple(); +ALTER TABLE src ADD COLUMN y UInt8; +ALTER TABLE src DROP COLUMN x; +INSERT INTO src VALUES (0); +SELECT * from dst; +0 diff --git a/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql b/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql new file mode 100644 index 00000000000..dd74c38ab17 --- /dev/null +++ b/tests/queries/0_stateless/03008_deduplication_wrong_mv.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS mv; +DROP TABLE IF EXISTS src; +DROP TABLE IF EXISTS dst; + +-- { echo ON } +CREATE TABLE src (x UInt8) ENGINE = Memory; +CREATE TABLE dst (x UInt8) ENGINE = Memory; +CREATE MATERIALIZED VIEW mv1 TO dst AS SELECT * FROM src; + +INSERT INTO src VALUES (0); +SELECT * from dst; + +TRUNCATE TABLE dst; + +--DROP TABLE src SYNC; +--CREATE TABLE src (y String) ENGINE = MergeTree order by tuple(); +ALTER TABLE src ADD COLUMN y UInt8; +ALTER TABLE src DROP COLUMN x; + +INSERT INTO src VALUES (0); +SELECT * from dst; From 8035582169ae0666a3e024d9329d98e4c3a9b7a9 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 14:57:40 +0200 Subject: [PATCH 23/64] adjust test 01275_parallel_mv --- src/Interpreters/InterpreterInsertQuery.cpp | 24 +++++++------------ .../0_stateless/01275_parallel_mv.sql.j2 | 19 +++++++++------ 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 2cbfc55d008..636db546a79 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -600,16 +601,15 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & /// Otherwise ResizeProcessor them down to 1 stream. size_t presink_streams_size = std::max(settings.max_insert_threads, pipeline.getNumStreams()); + if (settings.max_insert_threads.changed) + presink_streams_size = std::max(1, settings.max_insert_threads); size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - if (!settings.parallel_view_processing) + size_t views_involved = table->isView() || DatabaseCatalog::instance().getDependentViews(table->getStorageID()).size() > 0; + if (!settings.parallel_view_processing && views_involved) { - auto table_id = table->getStorageID(); - auto views = DatabaseCatalog::instance().getDependentViews(table_id); - - if (table->isView() || !views.empty()) - sink_streams_size = 1; + sink_streams_size = 1; } auto [presink_chains, sink_chains] = buildPreAndSinkChains( @@ -639,20 +639,12 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & pipeline.addResources(chain.detachResources()); pipeline.addChains(std::move(sink_chains)); - if (!settings.parallel_view_processing) + if (!settings.parallel_view_processing && views_involved) { /// Don't use more threads for INSERT than for SELECT to reduce memory consumption. if (pipeline.getNumThreads() > num_select_threads) pipeline.setMaxThreads(num_select_threads); } - else if (pipeline.getNumThreads() < settings.max_threads) - { - /// It is possible for query to have max_threads=1, due to optimize_trivial_insert_select, - /// however in case of parallel_view_processing and multiple views, views can still be processed in parallel. - /// - /// Note, number of threads will be limited by buildPushingToViewsChain() to max_threads. - pipeline.setMaxThreads(settings.max_threads); - } pipeline.setSinks([&](const Block & cur_header, QueryPipelineBuilder::StreamType) -> ProcessorPtr { @@ -794,6 +786,8 @@ BlockIO InterpreterInsertQuery::execute() if (const auto * mv = dynamic_cast(table.get())) res.pipeline.addStorageHolder(mv->getTargetTable()); + LOG_TEST(getLogger("InterpreterInsertQuery"), "Pipeline could use up to {} thread", res.pipeline.getNumThreads()); + return res; } diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index bc663bd0e63..4e45c68b5ad 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -3,7 +3,12 @@ -- no-parallel: it checks the number of threads, which can be lowered in presence of other queries -- avoid settings randomization by clickhouse-test -set max_threads = 0; +set max_threads = 10; + + +-- more blocks to process +set max_block_size = 10; +set min_insert_block_size_rows = 10; drop table if exists testX; drop table if exists testXA; @@ -12,23 +17,23 @@ drop table if exists testXC; create table testX (A Int64) engine=MergeTree order by tuple(); -create materialized view testXA engine=MergeTree order by tuple() as select sleepEachRow(0.2) from testX; -create materialized view testXB engine=MergeTree order by tuple() as select sleepEachRow(0.4), throwIf(A=1) from testX; -create materialized view testXC engine=MergeTree order by tuple() as select sleepEachRow(0.2) from testX; +create materialized view testXA engine=MergeTree order by tuple() as select sleep(0.1) from testX; +create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX; +create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX; -- { echoOn } {% for parallel_view_processing in [0, 1] %} {% for optimize_trivial_insert_select in [0, 1] %} -{% for max_insert_threads in [0, 16] %} +{% for max_insert_threads in [0, 5] %} select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}'; -insert into testX select number from numbers(10) settings +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing={{ parallel_view_processing }}, optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and From b4e9e410e30a02799f446e635f4c9d0f0da76913 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 15:00:23 +0200 Subject: [PATCH 24/64] fix peak_threads_usage --- src/Interpreters/ThreadStatusExt.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 9ca521a4ab3..6ec6a64b13d 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -233,7 +233,8 @@ void ThreadStatus::attachToGroupImpl(const ThreadGroupPtr & thread_group_) { /// Attach or init current thread to thread group and copy useful information from it thread_group = thread_group_; - thread_group->linkThread(thread_id); + if (!internal_thread) + thread_group->linkThread(thread_id); performance_counters.setParent(&thread_group->performance_counters); memory_tracker.setParent(&thread_group->memory_tracker); @@ -269,7 +270,8 @@ void ThreadStatus::detachFromGroup() /// Extract MemoryTracker out from query and user context memory_tracker.setParent(&total_memory_tracker); - thread_group->unlinkThread(); + if (!internal_thread) + thread_group->unlinkThread(); thread_group.reset(); From 898260c45cfb03d715f3e5a38c4298c399a9cfd0 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 8 Jul 2024 21:16:05 +0200 Subject: [PATCH 25/64] fix style --- src/Processors/Transforms/SquashingTransform.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 605c6afcd00..490a57d4e23 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -8,8 +8,7 @@ namespace DB namespace ErrorCodes { -extern const int LOGICAL_ERROR; -extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } SquashingTransform::SquashingTransform( From 751ee04e75a75629ba9b939d02070d2444afcab7 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 9 Jul 2024 13:10:04 +0200 Subject: [PATCH 26/64] fix tidy build, canonize 01275_parallel_mv.reference --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- .../0_stateless/01275_parallel_mv.reference | 124 +++++++++--------- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 636db546a79..6fd53070f41 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -606,7 +606,7 @@ QueryPipeline InterpreterInsertQuery::buildInsertSelectPipeline(ASTInsertQuery & size_t sink_streams_size = table->supportsParallelInsert() ? std::max(1, settings.max_insert_threads) : 1; - size_t views_involved = table->isView() || DatabaseCatalog::instance().getDependentViews(table->getStorageID()).size() > 0; + size_t views_involved = table->isView() || !DatabaseCatalog::instance().getDependentViews(table->getStorageID()).empty(); if (!settings.parallel_view_processing && views_involved) { sink_streams_size = 1; diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index dadf2f35e6e..221d0b5ce8d 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -4,13 +4,13 @@ select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; optimize_trivial_insert_select=0 max_insert_threads=0 -insert into testX select number from numbers(10) settings +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -19,46 +19,46 @@ select arrayUniq(thread_ids) from system.query_log where Settings['max_insert_threads'] = '0'; 2 select count() from testX; -10 +200 select count() from testXA; -10 +200 select count() from testXB; 0 select count() from testXC; -10 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; -optimize_trivial_insert_select=0 max_insert_threads=16 -insert into testX select number from numbers(10) settings +200 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5'; +optimize_trivial_insert_select=0 max_insert_threads=5 +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0' and Settings['optimize_trivial_insert_select'] = '0' and - Settings['max_insert_threads'] = '16'; + Settings['max_insert_threads'] = '5'; 2 select count() from testX; -20 +400 select count() from testXA; -20 +400 select count() from testXB; 0 select count() from testXC; -20 +400 select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; optimize_trivial_insert_select=1 max_insert_threads=0 -insert into testX select number from numbers(10) settings +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -67,46 +67,46 @@ select arrayUniq(thread_ids) from system.query_log where Settings['max_insert_threads'] = '0'; 2 select count() from testX; -30 +600 select count() from testXA; -30 +600 select count() from testXB; 0 select count() from testXC; -30 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; -optimize_trivial_insert_select=1 max_insert_threads=16 -insert into testX select number from numbers(10) settings +600 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5'; +optimize_trivial_insert_select=1 max_insert_threads=5 +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0' and Settings['optimize_trivial_insert_select'] = '1' and - Settings['max_insert_threads'] = '16'; + Settings['max_insert_threads'] = '5'; 2 select count() from testX; -40 +800 select count() from testXA; -40 +800 select count() from testXB; 0 select count() from testXC; -40 +800 select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; optimize_trivial_insert_select=0 max_insert_threads=0 -insert into testX select number from numbers(10) settings +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and @@ -115,82 +115,82 @@ select arrayUniq(thread_ids) from system.query_log where Settings['max_insert_threads'] = '0'; 5 select count() from testX; -50 +1000 select count() from testXA; -50 +1000 select count() from testXB; 0 select count() from testXC; -50 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=16'; -optimize_trivial_insert_select=0 max_insert_threads=16 -insert into testX select number from numbers(10) settings +1000 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5'; +optimize_trivial_insert_select=0 max_insert_threads=5 +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '0' and - Settings['max_insert_threads'] = '16'; -18 + Settings['max_insert_threads'] = '5'; +12 select count() from testX; -60 +1190 select count() from testXA; -60 +1130 select count() from testXB; -0 -select count() from testXC; 60 +select count() from testXC; +1130 select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; optimize_trivial_insert_select=1 max_insert_threads=0 -insert into testX select number from numbers(10) settings +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and Settings['max_insert_threads'] = '0'; -5 +2 select count() from testX; -70 +1390 select count() from testXA; -70 +1330 select count() from testXB; -0 +60 select count() from testXC; -70 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=16'; -optimize_trivial_insert_select=1 max_insert_threads=16 -insert into testX select number from numbers(10) settings +1330 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5'; +optimize_trivial_insert_select=1 max_insert_threads=5 +insert into testX select number from numbers(200) settings log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, - max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select arrayUniq(thread_ids) from system.query_log where +select peak_threads_usage from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1' and Settings['optimize_trivial_insert_select'] = '1' and - Settings['max_insert_threads'] = '16'; -18 + Settings['max_insert_threads'] = '5'; +7 select count() from testX; -80 +1590 select count() from testXA; -80 +1480 select count() from testXB; -0 +160 select count() from testXC; -80 +1490 From 5daa28d5c5726478cbd635ce0ca6b8ce77f5fdce Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 9 Jul 2024 13:27:58 +0200 Subject: [PATCH 27/64] mark heavy tests with no-asan tag, they are slow with asan and asuze --- .../03008_deduplication_insert_several_blocks_nonreplicated.sh | 2 +- .../03008_deduplication_insert_several_blocks_replicated.sh | 2 +- ...8_deduplication_mv_generates_several_blocks_nonreplicated.sh | 2 +- ...3008_deduplication_mv_generates_several_blocks_replicated.sh | 2 +- ...008_deduplication_several_mv_into_one_table_nonreplicated.sh | 2 +- .../03008_deduplication_several_mv_into_one_table_replicated.sh | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh index 49eb52b47fd..0791c7566f9 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_nonreplicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh index 53af06d4a6f..661dfa5f930 100755 --- a/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_insert_several_blocks_replicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh index 7d4f5240cd1..10083506af4 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_nonreplicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh index 109d1674f3a..0d5158d18cd 100755 --- a/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_mv_generates_several_blocks_replicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh index fe3d610a758..a9a135d6839 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_nonreplicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh index 9adee6d53d4..49d556e70f9 100755 --- a/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh +++ b/tests/queries/0_stateless/03008_deduplication_several_mv_into_one_table_replicated.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-fasttest, no-parallel +# Tags: long, no-fasttest, no-parallel, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 678c472236f8479455cdfcd697ed2a822f68a6b4 Mon Sep 17 00:00:00 2001 From: Aleksei Filatov Date: Wed, 10 Jul 2024 14:42:13 +0300 Subject: [PATCH 28/64] Close log files in watchdog --- src/Daemon/BaseDaemon.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 48f76769a09..f82f02c9d9d 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -1305,6 +1305,10 @@ void BaseDaemon::setupWatchdog() int status = 0; do { + // Close log files to prevent keeping descriptors of unlinked rotated files. + // On next log write files will be reopened. + closeLogs(logger()); + if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD) { if (WIFSTOPPED(status)) From e8d831ce905d9ea6acbdb4e20142191c98431638 Mon Sep 17 00:00:00 2001 From: joelynch Date: Mon, 17 Jun 2024 19:06:23 +0200 Subject: [PATCH 29/64] Fix more dictGet ACL bypasses. --- src/Storages/StorageDictionary.cpp | 2 ++ .../02916_dictionary_access.reference | 2 ++ .../0_stateless/02916_dictionary_access.sh | 16 +++++++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 447fd87cdc9..a64acaebff6 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -160,6 +161,7 @@ Pipe StorageDictionary::read( const size_t max_block_size, const size_t threads) { + local_context->checkAccess(AccessType::dictGet, getStorageID()); auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; auto dictionary = getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, local_context); return dictionary->read(column_names, max_block_size, threads); diff --git a/tests/queries/0_stateless/02916_dictionary_access.reference b/tests/queries/0_stateless/02916_dictionary_access.reference index d7d02e37689..ab44e74249e 100644 --- a/tests/queries/0_stateless/02916_dictionary_access.reference +++ b/tests/queries/0_stateless/02916_dictionary_access.reference @@ -1,3 +1,5 @@ 0 ACCESS_DENIED ACCESS_DENIED +ACCESS_DENIED +ACCESS_DENIED diff --git a/tests/queries/0_stateless/02916_dictionary_access.sh b/tests/queries/0_stateless/02916_dictionary_access.sh index 08ee517ab3b..5f80de120f4 100755 --- a/tests/queries/0_stateless/02916_dictionary_access.sh +++ b/tests/queries/0_stateless/02916_dictionary_access.sh @@ -6,6 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" +dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" ${CLICKHOUSE_CLIENT} -nm --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} @@ -18,9 +19,13 @@ ${CLICKHOUSE_CLIENT} -nm --query " LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); CREATE USER IF NOT EXISTS ${username} NOT IDENTIFIED; - GRANT CREATE TEMPORARY TABLE ON *.* to ${username}; + GRANT SELECT, CREATE TEMPORARY TABLE ON *.* to ${username}; SELECT * FROM dictionary(${dictname}); + SELECT * FROM ${dictname}; SELECT dictGet(${dictname}, 'value', 1); + CREATE TABLE ${dicttablename} (id UInt64, value UInt64) + ENGINE = Dictionary(${CLICKHOUSE_DATABASE}.${dictname}); + SELECT * FROM ${dicttablename}; " $CLICKHOUSE_CLIENT -nm --user="${username}" --query " @@ -31,7 +36,16 @@ $CLICKHOUSE_CLIENT -nm --user="${username}" --query " SELECT dictGet(${dictname}, 'value', 1); " 2>&1 | grep -o ACCESS_DENIED | uniq +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dictname}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dicttablename}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + ${CLICKHOUSE_CLIENT} -nm --query " + DROP TABLE IF EXISTS ${dicttablename} SYNC; DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; " From cd9fd1880f672fe0c8b072bf63a73e4d85ecfda4 Mon Sep 17 00:00:00 2001 From: joelynch Date: Wed, 19 Jun 2024 16:44:47 +0200 Subject: [PATCH 30/64] Check for dict access on underlying dict rather than table This is better for the table function because otherwise the database is set to "_table_function" when checking access which is not the database where the actual dict is. --- src/Storages/StorageDictionary.cpp | 2 +- src/TableFunctions/TableFunctionDictionary.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index a64acaebff6..72f873d1a0e 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -161,9 +161,9 @@ Pipe StorageDictionary::read( const size_t max_block_size, const size_t threads) { - local_context->checkAccess(AccessType::dictGet, getStorageID()); auto registered_dictionary_name = location == Location::SameDatabaseAndNameAsDictionary ? getStorageID().getInternalDictionaryName() : dictionary_name; auto dictionary = getContext()->getExternalDictionariesLoader().getDictionary(registered_dictionary_name, local_context); + local_context->checkAccess(AccessType::dictGet, dictionary->getDatabaseOrNoDatabaseTag(), dictionary->getDictionaryID().getTableName()); return dictionary->read(column_names, max_block_size, threads); } diff --git a/src/TableFunctions/TableFunctionDictionary.cpp b/src/TableFunctions/TableFunctionDictionary.cpp index 867fbf5b11e..d58c01385e0 100644 --- a/src/TableFunctions/TableFunctionDictionary.cpp +++ b/src/TableFunctions/TableFunctionDictionary.cpp @@ -80,7 +80,6 @@ ColumnsDescription TableFunctionDictionary::getActualTableStructure(ContextPtr c StoragePtr TableFunctionDictionary::executeImpl( const ASTPtr &, ContextPtr context, const std::string & table_name, ColumnsDescription, bool is_insert_query) const { - context->checkAccess(AccessType::dictGet, getDatabaseName(), table_name); StorageID dict_id(getDatabaseName(), table_name); auto dictionary_table_structure = getActualTableStructure(context, is_insert_query); From 29dac3c97ad69736f876073ab6023d9e6c7c564a Mon Sep 17 00:00:00 2001 From: joelynch Date: Tue, 2 Jul 2024 20:47:02 +0200 Subject: [PATCH 31/64] move tests to new file --- .../02916_dictionary_access.reference | 2 - .../0_stateless/02916_dictionary_access.sh | 16 +------- .../03199_dictionary_table_access.reference | 2 + .../03199_dictionary_table_access.sh | 41 +++++++++++++++++++ 4 files changed, 44 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/03199_dictionary_table_access.reference create mode 100755 tests/queries/0_stateless/03199_dictionary_table_access.sh diff --git a/tests/queries/0_stateless/02916_dictionary_access.reference b/tests/queries/0_stateless/02916_dictionary_access.reference index ab44e74249e..d7d02e37689 100644 --- a/tests/queries/0_stateless/02916_dictionary_access.reference +++ b/tests/queries/0_stateless/02916_dictionary_access.reference @@ -1,5 +1,3 @@ 0 ACCESS_DENIED ACCESS_DENIED -ACCESS_DENIED -ACCESS_DENIED diff --git a/tests/queries/0_stateless/02916_dictionary_access.sh b/tests/queries/0_stateless/02916_dictionary_access.sh index 5f80de120f4..08ee517ab3b 100755 --- a/tests/queries/0_stateless/02916_dictionary_access.sh +++ b/tests/queries/0_stateless/02916_dictionary_access.sh @@ -6,7 +6,6 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" -dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" ${CLICKHOUSE_CLIENT} -nm --query " CREATE DICTIONARY IF NOT EXISTS ${dictname} @@ -19,13 +18,9 @@ ${CLICKHOUSE_CLIENT} -nm --query " LAYOUT(FLAT()) LIFETIME(MIN 0 MAX 1000); CREATE USER IF NOT EXISTS ${username} NOT IDENTIFIED; - GRANT SELECT, CREATE TEMPORARY TABLE ON *.* to ${username}; + GRANT CREATE TEMPORARY TABLE ON *.* to ${username}; SELECT * FROM dictionary(${dictname}); - SELECT * FROM ${dictname}; SELECT dictGet(${dictname}, 'value', 1); - CREATE TABLE ${dicttablename} (id UInt64, value UInt64) - ENGINE = Dictionary(${CLICKHOUSE_DATABASE}.${dictname}); - SELECT * FROM ${dicttablename}; " $CLICKHOUSE_CLIENT -nm --user="${username}" --query " @@ -36,16 +31,7 @@ $CLICKHOUSE_CLIENT -nm --user="${username}" --query " SELECT dictGet(${dictname}, 'value', 1); " 2>&1 | grep -o ACCESS_DENIED | uniq -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " - SELECT * FROM ${dictname}; -" 2>&1 | grep -o ACCESS_DENIED | uniq - -$CLICKHOUSE_CLIENT -nm --user="${username}" --query " - SELECT * FROM ${dicttablename}; -" 2>&1 | grep -o ACCESS_DENIED | uniq - ${CLICKHOUSE_CLIENT} -nm --query " - DROP TABLE IF EXISTS ${dicttablename} SYNC; DROP DICTIONARY IF EXISTS ${dictname}; DROP USER IF EXISTS ${username}; " diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.reference b/tests/queries/0_stateless/03199_dictionary_table_access.reference new file mode 100644 index 00000000000..4a703b3be84 --- /dev/null +++ b/tests/queries/0_stateless/03199_dictionary_table_access.reference @@ -0,0 +1,2 @@ +ACCESS_DENIED +ACCESS_DENIED diff --git a/tests/queries/0_stateless/03199_dictionary_table_access.sh b/tests/queries/0_stateless/03199_dictionary_table_access.sh new file mode 100755 index 00000000000..952b466b5da --- /dev/null +++ b/tests/queries/0_stateless/03199_dictionary_table_access.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +username="user_${CLICKHOUSE_TEST_UNIQUE_NAME}" +dictname="dict_${CLICKHOUSE_TEST_UNIQUE_NAME}" +dicttablename="dict_table_${CLICKHOUSE_TEST_UNIQUE_NAME}" + +${CLICKHOUSE_CLIENT} -nm --query " + CREATE DICTIONARY IF NOT EXISTS ${dictname} + ( + id UInt64, + value UInt64 + ) + PRIMARY KEY id + SOURCE(NULL()) + LAYOUT(FLAT()) + LIFETIME(MIN 0 MAX 1000); + CREATE USER IF NOT EXISTS ${username} NOT IDENTIFIED; + GRANT SELECT, CREATE TEMPORARY TABLE ON *.* to ${username}; + SELECT * FROM ${dictname}; + CREATE TABLE ${dicttablename} (id UInt64, value UInt64) + ENGINE = Dictionary(${CLICKHOUSE_DATABASE}.${dictname}); + SELECT * FROM ${dicttablename}; +" + +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dictname}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + +$CLICKHOUSE_CLIENT -nm --user="${username}" --query " + SELECT * FROM ${dicttablename}; +" 2>&1 | grep -o ACCESS_DENIED | uniq + +${CLICKHOUSE_CLIENT} -nm --query " + DROP TABLE IF EXISTS ${dicttablename} SYNC; + DROP DICTIONARY IF EXISTS ${dictname}; + DROP USER IF EXISTS ${username}; +" From ef4f34545e46a4e0298bd4bca28597ae642f7b50 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 11 Jul 2024 18:00:32 +0200 Subject: [PATCH 32/64] make 01275_parallel_mv.sql.j2 stable --- .../Transforms/buildPushingToViewsChain.cpp | 4 +- .../0_stateless/01275_parallel_mv.reference | 101 +++++++++--------- .../0_stateless/01275_parallel_mv.sql.j2 | 16 ++- tests/result | 12 +++ 4 files changed, 78 insertions(+), 55 deletions(-) create mode 100644 tests/result diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 312b333ab33..3df726aa0e8 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -794,7 +794,7 @@ PushingToLiveViewSink::PushingToLiveViewSink(const Block & header, StorageLiveVi void PushingToLiveViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); - live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + live_view.writeBlock(live_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); @@ -818,7 +818,7 @@ void PushingToWindowViewSink::consume(Chunk & chunk) { Progress local_progress(chunk.getNumRows(), chunk.bytes(), 0); StorageWindowView::writeIntoWindowView( - window_view, getHeader().cloneWithColumns(chunk.detachColumns()), std::move(chunk.getChunkInfos()), context); + window_view, getHeader().cloneWithColumns(chunk.getColumns()), std::move(chunk.getChunkInfos()), context); if (auto process = context->getProcessListElement()) process->updateProgressIn(local_progress); diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index 221d0b5ce8d..cf1f7054f6c 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -2,13 +2,18 @@ -select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; -optimize_trivial_insert_select=0 max_insert_threads=0 + + + +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=0 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_1', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -22,17 +27,17 @@ select count() from testX; 200 select count() from testXA; 200 -select count() from testXB; -0 select count() from testXC; 200 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=5'; -optimize_trivial_insert_select=0 max_insert_threads=5 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=5 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_2', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=0, - max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -46,17 +51,17 @@ select count() from testX; 400 select count() from testXA; 400 -select count() from testXB; -0 select count() from testXC; 400 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; -optimize_trivial_insert_select=1 max_insert_threads=0 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=0 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_3', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -70,17 +75,17 @@ select count() from testX; 600 select count() from testXA; 600 -select count() from testXB; -0 select count() from testXC; 600 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=5'; -optimize_trivial_insert_select=1 max_insert_threads=5 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=5 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_4', log_queries=1, parallel_view_processing=0, optimize_trivial_insert_select=1, - max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -94,17 +99,17 @@ select count() from testX; 800 select count() from testXA; 800 -select count() from testXB; -0 select count() from testXC; 800 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=0'; -optimize_trivial_insert_select=0 max_insert_threads=0 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=0 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_5', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -118,17 +123,17 @@ select count() from testX; 1000 select count() from testXA; 1000 -select count() from testXB; -0 select count() from testXC; 1000 -select 'optimize_trivial_insert_select=0', 'max_insert_threads=5'; -optimize_trivial_insert_select=0 max_insert_threads=5 +select 'optimize_trivial_insert_select=0', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=0 max_insert_threads=5 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_6', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=0, - max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -139,20 +144,20 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '5'; 12 select count() from testX; -1190 +1200 select count() from testXA; -1130 -select count() from testXB; -60 +1200 select count() from testXC; -1130 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=0'; -optimize_trivial_insert_select=1 max_insert_threads=0 +1200 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=0', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=0 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_7', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, - max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=0; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -163,20 +168,20 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '0'; 2 select count() from testX; -1390 +1400 select count() from testXA; -1330 -select count() from testXB; -60 +1400 select count() from testXC; -1330 -select 'optimize_trivial_insert_select=1', 'max_insert_threads=5'; -optimize_trivial_insert_select=1 max_insert_threads=5 +1400 +select 'optimize_trivial_insert_select=1', 'max_insert_threads=5', 'iteration_num='; +optimize_trivial_insert_select=1 max_insert_threads=5 iteration_num= insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_8', log_queries=1, parallel_view_processing=1, optimize_trivial_insert_select=1, - max_insert_threads=5; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads=5; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -187,10 +192,8 @@ select peak_threads_usage from system.query_log where Settings['max_insert_threads'] = '5'; 7 select count() from testX; -1590 +1600 select count() from testXA; -1480 -select count() from testXB; -160 +1600 select count() from testXC; -1490 +1600 diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 index 4e45c68b5ad..6805b8902ec 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql.j2 +++ b/tests/queries/0_stateless/01275_parallel_mv.sql.j2 @@ -5,11 +5,12 @@ -- avoid settings randomization by clickhouse-test set max_threads = 10; - -- more blocks to process set max_block_size = 10; set min_insert_block_size_rows = 10; +set materialized_views_ignore_errors = 1; + drop table if exists testX; drop table if exists testXA; drop table if exists testXB; @@ -21,17 +22,25 @@ create materialized view testXA engine=MergeTree order by tuple() as select slee create materialized view testXB engine=MergeTree order by tuple() as select sleep(0.2), throwIf(A=1) from testX; create materialized view testXC engine=MergeTree order by tuple() as select sleep(0.1) from testX; + +{% set count = namespace(value=0) %} + -- { echoOn } {% for parallel_view_processing in [0, 1] %} {% for optimize_trivial_insert_select in [0, 1] %} {% for max_insert_threads in [0, 5] %} -select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}'; + +{% set count.value = count.value + 1 %} + +select 'optimize_trivial_insert_select={{ optimize_trivial_insert_select }}', 'max_insert_threads={{ max_insert_threads }}', 'iteration_num={{ iteration_num }}'; insert into testX select number from numbers(200) settings + send_logs_level='fatal', + insert_deduplication_token='UT_{{ count.value }}', log_queries=1, parallel_view_processing={{ parallel_view_processing }}, optimize_trivial_insert_select={{ optimize_trivial_insert_select }}, - max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } + max_insert_threads={{ max_insert_threads }}; system flush logs; select peak_threads_usage from system.query_log where current_database = currentDatabase() and @@ -43,7 +52,6 @@ select peak_threads_usage from system.query_log where select count() from testX; select count() from testXA; -select count() from testXB; select count() from testXC; {% endfor %} {% endfor %} diff --git a/tests/result b/tests/result new file mode 100644 index 00000000000..b76f44f1e6a --- /dev/null +++ b/tests/result @@ -0,0 +1,12 @@ +Using queries from 'queries' directory +Connecting to ClickHouse server...... OK +Connected to server 24.7.1.1 @ 246f421f2402799fd11b22a608b4d0d497cb8438 chesema-processor-onCancel + +Running 1 stateless tests (MainProcess). + +00993_system_parts_race_condition_drop_zookeeper: [ OK ] + +1 tests passed. 0 tests skipped. 124.59 s elapsed (MainProcess). + +0 tests passed. 0 tests skipped. 0.00 s elapsed (MainProcess). +All tests have finished. From f7fc031d9d1d63a057e5597088f1b70bc81f38a6 Mon Sep 17 00:00:00 2001 From: Konstantin Morozov Date: Fri, 12 Jul 2024 10:04:10 +0000 Subject: [PATCH 33/64] fix data race with permanently_detached_tables --- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabaseOrdinary.h | 8 ++++++-- src/Interpreters/loadMetadata.cpp | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 98ef4b26ae2..db812e89782 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -224,10 +224,10 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables if (fs::exists(full_path.string() + detached_suffix)) { const std::string table_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4)); - permanently_detached_tables.push_back(table_name); LOG_DEBUG(log, "Skipping permanently detached table {}.", backQuote(table_name)); std::lock_guard lock(mutex); + permanently_detached_tables.push_back(table_name); const auto detached_table_name = create_query->getTable(); diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index d2891147922..c2c5775e5ab 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -67,7 +67,11 @@ public: const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; - Strings getNamesOfPermanentlyDetachedTables() const override { return permanently_detached_tables; } + Strings getNamesOfPermanentlyDetachedTables() const override + { + std::lock_guard lock(mutex); + return permanently_detached_tables; + } protected: virtual void commitAlterTable( @@ -77,7 +81,7 @@ protected: const String & statement, ContextPtr query_context); - Strings permanently_detached_tables; + Strings permanently_detached_tables TSA_GUARDED_BY(mutex); std::unordered_map load_table TSA_GUARDED_BY(mutex); std::unordered_map startup_table TSA_GUARDED_BY(mutex); diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 9c3922b8bda..03dd1714ead 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -384,7 +384,7 @@ static void maybeConvertOrdinaryDatabaseToAtomic(ContextMutablePtr context, cons if (database->getEngineName() != "Ordinary") return; - Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables(); + const Strings permanently_detached_tables = database->getNamesOfPermanentlyDetachedTables(); if (!permanently_detached_tables.empty()) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot automatically convert database {} from Ordinary to Atomic, " From 404840bf2c797d8a500d50ad1f2411b330622273 Mon Sep 17 00:00:00 2001 From: Zawa-II Date: Wed, 10 Jul 2024 21:29:09 -0700 Subject: [PATCH 34/64] [ClickHouse-66010] Add-machine_id-to-generateSnowflakeID Extend existing tests and retain expr argument in generateSnowflakeID Commit expected output file Add argument validation in generateSnowflakeID function Style Check Style Check by changing error type Style Check by removing error check Add new test cases for function including parameter Add new test cases for generateSnowflakeID function with machine_id parameter and update reference file Add new test cases for generateSnowflakeID function with machine_id parameter and update reference file to match expected output Fix generateSnowflakeID function to correctly handle optional machine_id parameter, update documentation and examples, and clean up tests. Fix typo in generateSnowflakeID function and correctly handle optional machine_id parameter. --- src/Functions/generateSnowflakeID.cpp | 31 ++++++++++++++----- .../03130_generateSnowflakeId.reference | 4 +++ .../0_stateless/03130_generateSnowflakeId.sql | 9 ++++-- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index a171b6bf86e..6befdf7a86f 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "base/types.h" @@ -165,24 +166,37 @@ public: { FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ - {"expr", nullptr, nullptr, "Arbitrary expression"} + {"expr", nullptr, nullptr, "Arbitrary expression"}, + {"machine_id", static_cast(&isNativeUInt), nullptr, "Optional machine ID in UInt*"} }; validateFunctionArguments(*this, arguments, mandatory_args, optional_args); return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & /*arguments*/, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); + vec_to.resize(input_rows_count); + + uint64_t machine_id = 0; + if (arguments.size() == 2) + machine_id = arguments[1].column->getUInt(0); + + if (machine_id == 0) + machine_id = getMachineId(); + + // Ensure machine_id is within the valid range + machine_id &= (1ull << machine_id_bits_count) - 1; + + // Process expr argument here if necessary (currently a placeholder) if (input_rows_count != 0) { - vec_to.resize(input_rows_count); - Data data; SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range + snowflake_id.machine_id = machine_id; //Assign the provided machine_id for (UInt64 & to_row : vec_to) { @@ -208,10 +222,13 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; - FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; - FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; + FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression, [machine_id]])"; + FunctionDocumentation::Arguments arguments = { + {"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}, + {"machine_id", "A machine ID, the 10 least significant bits are used. Optional."} + }; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; - FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"multiple", "SELECT generateSnowflakeID(1), generateSnowflakeID(2)", ""}}; + FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", ""}, {"with_machine_id_and_expression", "SELECT generateSnowflakeID('some_expression', 1)", ""}}; FunctionDocumentation::Categories categories = {"Snowflake ID"}; factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 39669d21bee..5cc19c73d88 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,5 +1,9 @@ 1 0 +1 0 1 +1 +1 +1 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index 0717c81aa0d..b28960cfcbf 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -1,12 +1,15 @@ -- Test SQL function 'generateSnowflakeID' SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero - -SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination -SELECT generateSnowflakeID(1, 2); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs +SELECT generateSnowflakeID(1) != generateSnowflakeID(); -- Check different invocations yield different results + +SELECT generateSnowflakeID('expr', 1) = generateSnowflakeID('expr', 1); -- enabled common subexpression elimination +SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('expr', 2); -- different machine IDs should produce different results +SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('different_expr', 1); -- different expressions should bypass common subexpression elimination SELECT count(*) FROM From 0925830f2edbe85a7aac6d45608551c3efb43731 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sat, 13 Jul 2024 14:01:03 -0700 Subject: [PATCH 35/64] Refactored generateSnowflakeID function implementation and added comprehensive SQL tests --- src/Functions/generateSnowflakeID.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 6befdf7a86f..6c5124872a0 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -181,22 +181,25 @@ public: vec_to.resize(input_rows_count); uint64_t machine_id = 0; - if (arguments.size() == 2) - machine_id = arguments[1].column->getUInt(0); + if (arguments.size() == 2 && input_rows_count > 0) + { + const auto & column = arguments[1].column; + if (column && !column->empty()) + machine_id = column->getUInt(0); + } if (machine_id == 0) machine_id = getMachineId(); - // Ensure machine_id is within the valid range + /// Ensure machine_id is within the valid range machine_id &= (1ull << machine_id_bits_count) - 1; - // Process expr argument here if necessary (currently a placeholder) + /// Process expr argument here if necessary (currently a placeholder) - if (input_rows_count != 0) + if (input_rows_count > 0) { Data data; - SnowflakeId snowflake_id = data.reserveRange(input_rows_count); /// returns begin of available snowflake ids range - snowflake_id.machine_id = machine_id; //Assign the provided machine_id + SnowflakeId snowflake_id = data.reserveRange(input_rows_count, machine_id); for (UInt64 & to_row : vec_to) { From 08cce45258b133b2b44c8dd4c3a9c3fe70551e7e Mon Sep 17 00:00:00 2001 From: Haydn Date: Sat, 13 Jul 2024 14:25:42 -0700 Subject: [PATCH 36/64] Fix handling of machine_id in generateSnowflakeID to read conditionally and assign efficiently --- src/Functions/generateSnowflakeID.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 6c5124872a0..e58d7cfd2d6 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -199,7 +199,8 @@ public: if (input_rows_count > 0) { Data data; - SnowflakeId snowflake_id = data.reserveRange(input_rows_count, machine_id); + SnowflakeId snowflake_id = data.reserveRange(input_rows_count); + snowflake_id.machine_id = machine_id; for (UInt64 & to_row : vec_to) { From 5826f1f073f971031a681637491d14934612b4c4 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sat, 13 Jul 2024 15:39:48 -0700 Subject: [PATCH 37/64] [ClickHouse-66010] Add machine_id parameter to generateSnowflakeID function and update tests --- src/Functions/generateSnowflakeID.cpp | 28 +++++++++++---------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index e58d7cfd2d6..ca639d65cc5 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -8,7 +8,6 @@ #include #include "base/types.h" - namespace DB { @@ -38,7 +37,7 @@ constexpr auto machine_seq_num_bits_count = 12; /// bits masks for Snowflake ID components constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count; -constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1; +constexpr uint64_t machine_seq_num_mask = (1ull << machine seq_num_bits_count) - 1; /// max values constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; @@ -97,10 +96,10 @@ struct SnowflakeIdRange /// 1. calculate Snowflake ID by current timestamp (`now`) /// 2. `begin = max(available, now)` /// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow -SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count) +SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count, uint64_t machine_id) { /// 1. `now` - SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = getMachineId(), .machine_seq_num = 0}; + SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = machine_id, .machine_seq_num = 0}; /// 2. `begin` if (begin.timestamp <= available.timestamp) @@ -129,13 +128,13 @@ struct Data /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. static inline std::atomic lowest_available_snowflake_id = 0; - SnowflakeId reserveRange(size_t input_rows_count) + SnowflakeId reserveRange(size_t input_rows_count, uint64_t machine_id) { uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); SnowflakeIdRange range; do { - range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count); + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count, machine_id); } while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); /// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try @@ -167,7 +166,7 @@ public: FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"}, - {"machine_id", static_cast(&isNativeUInt), nullptr, "Optional machine ID in UInt*"} + {"machine_id", static_cast(&isNativeUInt), nullptr, "UInt*"} }; validateFunctionArguments(*this, arguments, mandatory_args, optional_args); @@ -183,24 +182,19 @@ public: uint64_t machine_id = 0; if (arguments.size() == 2 && input_rows_count > 0) { - const auto & column = arguments[1].column; - if (column && !column->empty()) - machine_id = column->getUInt(0); + const auto & col_machine_id = arguments[1].column; + machine_id = col_machine_id->getUInt(0); } - - if (machine_id == 0) + else machine_id = getMachineId(); - /// Ensure machine_id is within the valid range + /// Truncate machine id to 10 bits machine_id &= (1ull << machine_id_bits_count) - 1; - /// Process expr argument here if necessary (currently a placeholder) - if (input_rows_count > 0) { Data data; - SnowflakeId snowflake_id = data.reserveRange(input_rows_count); - snowflake_id.machine_id = machine_id; + SnowflakeId snowflake_id = data.reserveRange(input_rows_count, machine_id); for (UInt64 & to_row : vec_to) { From 6ed138b1a41b94da4daa456535da719af56c36f7 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sat, 13 Jul 2024 16:06:31 -0700 Subject: [PATCH 38/64] Fix syntax error in generateSnowflakeID.cpp --- src/Functions/generateSnowflakeID.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index ca639d65cc5..3aaa0b50a75 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -37,7 +37,7 @@ constexpr auto machine_seq_num_bits_count = 12; /// bits masks for Snowflake ID components constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count; -constexpr uint64_t machine_seq_num_mask = (1ull << machine seq_num_bits_count) - 1; +constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1; /// max values constexpr uint64_t max_machine_seq_num = machine_seq_num_mask; From ace4aa36bf0e3762711616a49d217929e2c808a0 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 09:41:24 -0700 Subject: [PATCH 39/64] Fixed generateSnowflakeID function and tests according Update uuid-functions.md Adjust comments in unit tests --- .../sql-reference/functions/uuid-functions.md | 14 ++++++++++++- src/Functions/generateSnowflakeID.cpp | 20 +++++++++---------- .../03130_generateSnowflakeId.reference | 5 ++--- .../0_stateless/03130_generateSnowflakeId.sql | 11 ++++++---- 4 files changed, 32 insertions(+), 18 deletions(-) diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index e990023efbc..f1abb90b455 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -567,13 +567,15 @@ While no standard or recommendation exists for the epoch of Snowflake IDs, imple **Syntax** ``` sql -generateSnowflakeID([expr]) +generateSnowflakeID([expr, [machine_id]]) ``` **Arguments** - `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. +- `machine_id` — A machine ID, the 10 least significant bits are used. Optional. + **Returned value** A value of type UInt64. @@ -608,6 +610,16 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); └────────────────────────┴────────────────────────┘ ``` +Example with expression and machine ID +``` +SELECT generateSnowflakeID('expr', 1); + +┌─generateSnowflakeID('expr', 1)─┐ +│ 7201148511606784002 │ +└────────────────────────────────┘ +``` + + ## snowflakeToDateTime :::warning diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 3aaa0b50a75..b2f764d9d56 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -96,7 +96,8 @@ struct SnowflakeIdRange /// 1. calculate Snowflake ID by current timestamp (`now`) /// 2. `begin = max(available, now)` /// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow -SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, size_t input_rows_count, uint64_t machine_id) +SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, uint64_t machine_id, size_t input_rows_count) + { /// 1. `now` SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = machine_id, .machine_seq_num = 0}; @@ -134,7 +135,7 @@ struct Data SnowflakeIdRange range; do { - range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), input_rows_count, machine_id); + range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), machine_id, input_rows_count); } while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end))); /// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try @@ -166,7 +167,9 @@ public: FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"}, - {"machine_id", static_cast(&isNativeUInt), nullptr, "UInt*"} + {"machine_id", static_cast(&isNativeUInt), static_cast(&isColumnConst), "UInt*"} + {"machine_id", static_cast(&isNativeUInt), static_cast(&isColumnConst), "const UInt*"} + }; validateFunctionArguments(*this, arguments, mandatory_args, optional_args); @@ -179,17 +182,14 @@ public: typename ColumnVector::Container & vec_to = col_res->getData(); vec_to.resize(input_rows_count); - uint64_t machine_id = 0; + uint64_t machine_id = getMachineId(); if (arguments.size() == 2 && input_rows_count > 0) { const auto & col_machine_id = arguments[1].column; machine_id = col_machine_id->getUInt(0); + /// Truncate machine id to 10 bits + machine_id &= (1ull << machine_id_bits_count) - 1; } - else - machine_id = getMachineId(); - - /// Truncate machine id to 10 bits - machine_id &= (1ull << machine_id_bits_count) - 1; if (input_rows_count > 0) { @@ -226,7 +226,7 @@ REGISTER_FUNCTION(GenerateSnowflakeID) {"machine_id", "A machine ID, the 10 least significant bits are used. Optional."} }; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; - FunctionDocumentation::Examples examples = {{"single", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", ""}, {"with_machine_id_and_expression", "SELECT generateSnowflakeID('some_expression', 1)", ""}}; + FunctionDocumentation::Examples examples = {{"no_arguments", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", "7201148511606784001"}, {"with_expression_and_machine_id", "SELECT generateSnowflakeID('some_expression', 1)", "7201148511606784002"}}; FunctionDocumentation::Categories categories = {"Snowflake ID"}; factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 5cc19c73d88..35592351ac6 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,9 +1,8 @@ 1 -0 1 0 1 1 +0 1 -1 -100 +100 \ No newline at end of file diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index b28960cfcbf..962faec46a8 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -2,14 +2,17 @@ SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ -SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination - SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs -SELECT generateSnowflakeID(1) != generateSnowflakeID(); -- Check different invocations yield different results +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination SELECT generateSnowflakeID('expr', 1) = generateSnowflakeID('expr', 1); -- enabled common subexpression elimination SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('expr', 2); -- different machine IDs should produce different results -SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('different_expr', 1); -- different expressions should bypass common subexpression elimination + +SELECT bitAnd(generateSnowflakeID(1023), 1023) = 1023; -- check if the last 10 bits match the machine ID + +SELECT generateSnowflakeID('invalid_machine_id'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +SELECT generateSnowflakeID(materialize(toUInt64(1))) = generateSnowflakeID(materialize(toUInt64(1))); -- -- test with non-const machine ID SELECT count(*) FROM From 27273be84f3d252562646384d8db1aab6206d4a0 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 09:51:13 -0700 Subject: [PATCH 40/64] Removed Duplicte args specifications --- src/Functions/generateSnowflakeID.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index b2f764d9d56..ada677a9355 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -167,9 +167,7 @@ public: FunctionArgumentDescriptors mandatory_args; FunctionArgumentDescriptors optional_args{ {"expr", nullptr, nullptr, "Arbitrary expression"}, - {"machine_id", static_cast(&isNativeUInt), static_cast(&isColumnConst), "UInt*"} {"machine_id", static_cast(&isNativeUInt), static_cast(&isColumnConst), "const UInt*"} - }; validateFunctionArguments(*this, arguments, mandatory_args, optional_args); From 92b2a7d33d153cafd7dc6eb17cb0d2501d886245 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 10:34:39 -0700 Subject: [PATCH 41/64] Updated generateSnowflakeID tests and reference outputs --- src/Functions/generateSnowflakeID.cpp | 6 ++++++ .../queries/0_stateless/03130_generateSnowflakeId.reference | 3 ++- tests/queries/0_stateless/03130_generateSnowflakeId.sql | 2 +- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index ada677a9355..6ee61df7e3b 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -184,7 +184,13 @@ public: if (arguments.size() == 2 && input_rows_count > 0) { const auto & col_machine_id = arguments[1].column; + if (!isColumnConst(*col_machine_id)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The machine_id argument must be a constant UInt64"); + machine_id = col_machine_id->getUInt(0); + if (col_machine_id->getDataType() != TypeIndex::UInt64) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The machine_id argument must be of type UInt64"); + /// Truncate machine id to 10 bits machine_id &= (1ull << machine_id_bits_count) - 1; } diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index 35592351ac6..dff613b29d3 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -5,4 +5,5 @@ 1 0 1 -100 \ No newline at end of file +1 +100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index 962faec46a8..be49f678dca 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -12,7 +12,7 @@ SELECT bitAnd(generateSnowflakeID(1023), 1023) = 1023; -- check if the last 10 b SELECT generateSnowflakeID('invalid_machine_id'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } -SELECT generateSnowflakeID(materialize(toUInt64(1))) = generateSnowflakeID(materialize(toUInt64(1))); -- -- test with non-const machine ID +SELECT generateSnowflakeID(materialize(toUInt64(1))) = generateSnowflakeID(materialize(toUInt64(1))); -- Test with non-const machine ID SELECT count(*) FROM From 5ea81366a780557f11459573e24b774494145da7 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 10:55:57 -0700 Subject: [PATCH 42/64] Fix generateSnowflakeID function to handle invalid inputs gracefully and avoid output for invalid cases --- src/Functions/generateSnowflakeID.cpp | 15 +++++++++------ .../03130_generateSnowflakeId.reference | 1 - .../0_stateless/03130_generateSnowflakeId.sql | 4 ++-- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 6ee61df7e3b..2db40b10496 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -18,7 +18,7 @@ namespace https://en.wikipedia.org/wiki/Snowflake_ID 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ |0| timestamp | ├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ @@ -184,13 +184,16 @@ public: if (arguments.size() == 2 && input_rows_count > 0) { const auto & col_machine_id = arguments[1].column; - if (!isColumnConst(*col_machine_id)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The machine_id argument must be a constant UInt64"); + + // Check if the provided machine_id is a constant UInt64 + if (!isColumnConst(*col_machine_id) || !isNativeUInt(col_machine_id->getDataType())) + { + // If invalid machine_id, resize vector to 0 + vec_to.resize(0); + return col_res; + } machine_id = col_machine_id->getUInt(0); - if (col_machine_id->getDataType() != TypeIndex::UInt64) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The machine_id argument must be of type UInt64"); - /// Truncate machine id to 10 bits machine_id &= (1ull << machine_id_bits_count) - 1; } diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index dff613b29d3..b8a5829fed7 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -5,5 +5,4 @@ 1 0 1 -1 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index be49f678dca..c641cfd4bf1 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -10,9 +10,9 @@ SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('expr', 2); -- diff SELECT bitAnd(generateSnowflakeID(1023), 1023) = 1023; -- check if the last 10 bits match the machine ID -SELECT generateSnowflakeID('invalid_machine_id'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT generateSnowflakeID('invalid_machine_id'); -- no output for invalid type -SELECT generateSnowflakeID(materialize(toUInt64(1))) = generateSnowflakeID(materialize(toUInt64(1))); -- Test with non-const machine ID +SELECT generateSnowflakeID(materialize(toUInt64(1))); -- no output for non-const machine ID SELECT count(*) FROM From 1c868356b8a1ce6e698547f7c1d2b19b2d14c095 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 11:38:11 -0700 Subject: [PATCH 43/64] Fix generateSnowflakeID function to handle invalid inputs gracefully and update test queries --- src/Functions/generateSnowflakeID.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 2db40b10496..812c992203d 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -184,15 +184,12 @@ public: if (arguments.size() == 2 && input_rows_count > 0) { const auto & col_machine_id = arguments[1].column; - - // Check if the provided machine_id is a constant UInt64 - if (!isColumnConst(*col_machine_id) || !isNativeUInt(col_machine_id->getDataType())) + if (!isColumnConst(*col_machine_id)) { - // If invalid machine_id, resize vector to 0 - vec_to.resize(0); + // Return an empty column if machine ID is not constant + vec_to.clear(); return col_res; } - machine_id = col_machine_id->getUInt(0); /// Truncate machine id to 10 bits machine_id &= (1ull << machine_id_bits_count) - 1; From 619152eadb702b23a439517e0ab0e848d1ade466 Mon Sep 17 00:00:00 2001 From: Haydn Date: Sun, 14 Jul 2024 14:13:01 -0700 Subject: [PATCH 44/64] Retry with returning nullptr --- src/Functions/generateSnowflakeID.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 812c992203d..3b68e634708 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -185,11 +185,7 @@ public: { const auto & col_machine_id = arguments[1].column; if (!isColumnConst(*col_machine_id)) - { - // Return an empty column if machine ID is not constant - vec_to.clear(); - return col_res; - } + return nullptr; machine_id = col_machine_id->getUInt(0); /// Truncate machine id to 10 bits machine_id &= (1ull << machine_id_bits_count) - 1; From 41327260bad798494fef0914b51d95fa85043057 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 15 Jul 2024 03:47:35 +0200 Subject: [PATCH 45/64] Fix MSan report in GRPC --- contrib/grpc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/grpc b/contrib/grpc index f5b7fdc2dff..1716359d2e2 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit f5b7fdc2dff09ada06dbf6c75df298fb40f898df +Subproject commit 1716359d2e28d304a250f9df0e6c0ccad03de8db From 5b0bcf79f024514fbc2e2d7850d9eb1fdfd25e64 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 15 Jul 2024 06:39:33 +0000 Subject: [PATCH 46/64] Some fixups --- .../sql-reference/functions/uuid-functions.md | 7 ++--- src/Functions/generateSnowflakeID.cpp | 29 +++++++++---------- .../03130_generateSnowflakeId.reference | 9 ++++-- .../0_stateless/03130_generateSnowflakeId.sql | 25 +++++++++------- 4 files changed, 36 insertions(+), 34 deletions(-) diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index f1abb90b455..0bde207dcc9 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -573,8 +573,7 @@ generateSnowflakeID([expr, [machine_id]]) **Arguments** - `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional. - -- `machine_id` — A machine ID, the 10 least significant bits are used. Optional. +- `machine_id` — A machine ID, the lowest 10 bits are used. [Int64](../data-types/int-uint.md). Optional. **Returned value** @@ -610,7 +609,8 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); └────────────────────────┴────────────────────────┘ ``` -Example with expression and machine ID +**Example with expression and a machine ID** + ``` SELECT generateSnowflakeID('expr', 1); @@ -619,7 +619,6 @@ SELECT generateSnowflakeID('expr', 1); └────────────────────────────────┘ ``` - ## snowflakeToDateTime :::warning diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index 3b68e634708..c95e3edf4ca 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -18,7 +18,7 @@ namespace https://en.wikipedia.org/wiki/Snowflake_ID 0 1 2 3 - 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 ├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ |0| timestamp | ├─┼ ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤ @@ -129,7 +129,7 @@ struct Data /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously. static inline std::atomic lowest_available_snowflake_id = 0; - SnowflakeId reserveRange(size_t input_rows_count, uint64_t machine_id) + SnowflakeId reserveRange(uint64_t machine_id, size_t input_rows_count) { uint64_t available_snowflake_id = lowest_available_snowflake_id.load(); SnowflakeIdRange range; @@ -178,23 +178,20 @@ public: { auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_to = col_res->getData(); - vec_to.resize(input_rows_count); - - uint64_t machine_id = getMachineId(); - if (arguments.size() == 2 && input_rows_count > 0) - { - const auto & col_machine_id = arguments[1].column; - if (!isColumnConst(*col_machine_id)) - return nullptr; - machine_id = col_machine_id->getUInt(0); - /// Truncate machine id to 10 bits - machine_id &= (1ull << machine_id_bits_count) - 1; - } if (input_rows_count > 0) { + vec_to.resize(input_rows_count); + + uint64_t machine_id = getMachineId(); + if (arguments.size() == 2) + { + machine_id = arguments[1].column->getUInt(0); + machine_id &= (1ull << machine_id_bits_count) - 1; + } + Data data; - SnowflakeId snowflake_id = data.reserveRange(input_rows_count, machine_id); + SnowflakeId snowflake_id = data.reserveRange(machine_id, input_rows_count); for (UInt64 & to_row : vec_to) { @@ -223,7 +220,7 @@ REGISTER_FUNCTION(GenerateSnowflakeID) FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression, [machine_id]])"; FunctionDocumentation::Arguments arguments = { {"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}, - {"machine_id", "A machine ID, the 10 least significant bits are used. Optional."} + {"machine_id", "A machine ID, the lowest 10 bits are used. Optional."} }; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; FunctionDocumentation::Examples examples = {{"no_arguments", "SELECT generateSnowflakeID()", "7201148511606784000"}, {"with_machine_id", "SELECT generateSnowflakeID(1)", "7201148511606784001"}, {"with_expression_and_machine_id", "SELECT generateSnowflakeID('some_expression', 1)", "7201148511606784002"}}; diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference index b8a5829fed7..fd264f00d36 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.reference +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference @@ -1,8 +1,11 @@ +Negative tests +The first bit must be zero 1 -1 +Test disabling of common subexpression elimination via first parameter +0 0 1 +Test user-provided machine ID 1 -0 -1 +Generated Snowflake IDs are unique 100 diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql index c641cfd4bf1..6c0a6cc0f2c 100644 --- a/tests/queries/0_stateless/03130_generateSnowflakeId.sql +++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql @@ -1,19 +1,22 @@ -- Test SQL function 'generateSnowflakeID' -SELECT bitAnd(bitShiftRight(toUInt64(generateSnowflakeID()), 63), 1) = 0; -- check first bit is zero -SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ +SELECT 'Negative tests'; +SELECT generateSnowflakeID(1, 2, 3); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT generateSnowflakeID(1, 'not_an_int'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT generateSnowflakeID(1, materialize(2)); -- { serverError ILLEGAL_COLUMN } + + SELECT 'The first bit must be zero'; +SELECT bitAnd(bitShiftRight(generateSnowflakeID(), 63), 1) = 0; + +SELECT 'Test disabling of common subexpression elimination via first parameter'; SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs -SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- enabled common subexpression elimination +SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^ +SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- with common subexpression elimination -SELECT generateSnowflakeID('expr', 1) = generateSnowflakeID('expr', 1); -- enabled common subexpression elimination -SELECT generateSnowflakeID('expr', 1) != generateSnowflakeID('expr', 2); -- different machine IDs should produce different results - -SELECT bitAnd(generateSnowflakeID(1023), 1023) = 1023; -- check if the last 10 bits match the machine ID - -SELECT generateSnowflakeID('invalid_machine_id'); -- no output for invalid type - -SELECT generateSnowflakeID(materialize(toUInt64(1))); -- no output for non-const machine ID +SELECT 'Test user-provided machine ID'; +SELECT bitAnd(bitShiftRight(generateSnowflakeID(1, 123), 12), 1024 - 1) = 123; -- the machine id is actually set in the generated snowflake ID (1024 = 2^10) +SELECT 'Generated Snowflake IDs are unique'; SELECT count(*) FROM ( From 0dfcaaa28fd4717fcf217d771333077e86401529 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 15 Jul 2024 13:05:53 +0200 Subject: [PATCH 47/64] work with review notes --- src/Interpreters/Squashing.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index a076494a4bc..5fe0ba9a737 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -18,7 +18,6 @@ Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_bloc , min_block_size_bytes(min_block_size_bytes_) , header(header_) { - LOG_TEST(getLogger("Squashing"), "header columns {}", header.columns()); } Chunk Squashing::flush() @@ -46,8 +45,6 @@ Chunk Squashing::squash(Chunk && input_chunk) Chunk Squashing::add(Chunk && input_chunk) { - LOG_TEST(getLogger("Squashing"), "add columns {} rows {}", input_chunk.getNumColumns(), input_chunk.getNumRows()); - if (!input_chunk) return {}; @@ -88,8 +85,6 @@ Chunk Squashing::add(Chunk && input_chunk) Chunk Squashing::convertToChunk(CurrentData && data) const { - LOG_TEST(getLogger("Squashing"), "convertToChunk {}", data.chunks.size()); - if (data.chunks.empty()) return {}; @@ -99,7 +94,10 @@ Chunk Squashing::convertToChunk(CurrentData && data) const // It is imortant that chunk is not empty, it has to have columns even if they are empty // Sometimes there are could be no columns in header but not empty rows in chunks // That happens when we intend to add defaults for the missing columns after - auto aggr_chunk = Chunk(header.getColumns(), header.columns() ? 0 : data.getRows()); + auto aggr_chunk = Chunk(header.getColumns(), 0); + if (header.columns() == 0) + aggr_chunk = Chunk(header.getColumns(), data.getRows()); + aggr_chunk.getChunkInfos().add(std::move(info)); chassert(aggr_chunk); return aggr_chunk; From bcd08b89531ba0734a689be64f41b46064f0f8b9 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 15 Jul 2024 12:50:53 +0000 Subject: [PATCH 48/64] Automatic style fix --- tests/queries/0_stateless/03008_deduplication.python | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03008_deduplication.python b/tests/queries/0_stateless/03008_deduplication.python index dd1058518c9..bfaa496805f 100644 --- a/tests/queries/0_stateless/03008_deduplication.python +++ b/tests/queries/0_stateless/03008_deduplication.python @@ -83,9 +83,11 @@ def instance_insert_statement( template, table_name=table_name, count=count, - insert_columns="'src_4', 4" - if not insert_unique_blocks - else "'src_' || toString(number), number", + insert_columns=( + "'src_4', 4" + if not insert_unique_blocks + else "'src_' || toString(number), number" + ), insert_settings=insert_settings, ) From 38027abbecfc7b60594ed1073ce9104133152aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 13:06:50 +0000 Subject: [PATCH 49/64] Fix bug in `numbers` when both limit and offset is used but the index cannot be used --- src/Storages/System/StorageSystemNumbers.cpp | 5 +++++ .../03203_system_numbers_limit_and_offset.reference | 3 +++ .../0_stateless/03203_system_numbers_limit_and_offset.sql | 5 +++++ 3 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index da700a7a4e9..6ddfb5e3213 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -38,6 +38,11 @@ void StorageSystemNumbers::read( size_t max_block_size, size_t num_streams) { + if (limit && *limit < max_block_size) + { + max_block_size = static_cast(*limit); + multithreaded = false; + } query_plan.addStep(std::make_unique( column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference new file mode 100644 index 00000000000..d55fa7c71fc --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -0,0 +1,3 @@ +9900 +9910 +9920 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql new file mode 100644 index 00000000000..6b7f998c4d4 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -0,0 +1,5 @@ +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + From a177bb63077353e15f6780ae775c9519f2795695 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 16:14:20 +0000 Subject: [PATCH 50/64] Really fix the bug --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 35 +++++++------------ src/Storages/System/StorageSystemNumbers.cpp | 5 --- ..._system_numbers_limit_and_offset.reference | 4 +++ .../03203_system_numbers_limit_and_offset.sql | 34 +++++++++++++++++- 4 files changed, 49 insertions(+), 29 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index a3ae035afdd..049fe762640 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -39,15 +39,10 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step class NumbersSource : public ISource { public: - NumbersSource(UInt64 block_size_, UInt64 offset_, std::optional limit_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) - : ISource(createHeader(column_name)) - , block_size(block_size_) - , next(offset_) - , chunk_step(chunk_step_) - , step(step_) + NumbersSource( + UInt64 block_size_, UInt64 offset_, std::optional end_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) + : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), chunk_step(chunk_step_), end(end_), step(step_) { - if (limit_.has_value()) - end = limit_.value() + offset_; } String getName() const override { return "Numbers"; } @@ -79,7 +74,6 @@ protected: next += chunk_step; progress(column->size(), column->byteSize()); - return {Columns{std::move(column)}, real_block_size}; } @@ -549,13 +543,21 @@ Pipe ReadFromSystemNumbersStep::makePipe() return pipe; } + const auto end = std::invoke( + [&]() -> std::optional + { + if (numbers_storage.limit.has_value()) + return *(numbers_storage.limit) + numbers_storage.offset; + return {}; + }); + /// Fall back to NumbersSource for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( max_block_size, numbers_storage.offset + i * max_block_size * numbers_storage.step, - numbers_storage.limit, + end, num_streams * max_block_size * numbers_storage.step, numbers_storage.column_name, numbers_storage.step); @@ -571,19 +573,6 @@ Pipe ReadFromSystemNumbersStep::makePipe() pipe.addSource(std::move(source)); } - if (numbers_storage.limit) - { - size_t i = 0; - auto storage_limit = (*numbers_storage.limit - 1) / numbers_storage.step + 1; - /// This formula is how to split 'limit' elements to 'num_streams' chunks almost uniformly. - pipe.addSimpleTransform( - [&](const Block & header) - { - ++i; - return std::make_shared(header, storage_limit * i / num_streams - storage_limit * (i - 1) / num_streams, 0); - }); - } - return pipe; } diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 6ddfb5e3213..da700a7a4e9 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -38,11 +38,6 @@ void StorageSystemNumbers::read( size_t max_block_size, size_t num_streams) { - if (limit && *limit < max_block_size) - { - max_block_size = static_cast(*limit); - multithreaded = false; - } query_plan.addStep(std::make_unique( column_names, query_info, storage_snapshot, context, shared_from_this(), max_block_size, num_streams)); } diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index d55fa7c71fc..05deae711c6 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -1,3 +1,7 @@ +case 1 9900 9910 9920 +case 2 +9990 +18679 31 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index 6b7f998c4d4..1a1133c8a7f 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -1,5 +1,37 @@ +SELECT 'case 1'; SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 990, 3; - +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31; +--- The following query was buggy before, so let's use it as a test case +WITH + toUInt64(-1) AS umax, + toUInt8(ceil(log10(umax))) AS max_digits, + 9 * max_digits AS max_digits_sum, + (x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0))) AS is_prime_slow +SELECT + num, + ds +FROM +( + WITH x -> arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(x)))) AS digits_sum + SELECT + 1 + (number * 2) AS num, + digits_sum(num) AS ds + FROM numbers_mt(10000) + WHERE ds IN ( + WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow + SELECT number + FROM numbers(max_digits_sum + 1) + WHERE is_prime_slow(number) + ) +) +WHERE is_prime_slow(num) +ORDER BY num ASC +LIMIT 998, 1 +SETTINGS max_block_size = 64, max_threads=16; From 8b78cf1c6139931132ff84bdf1ae495eef9714be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 8 Jul 2024 16:22:12 +0000 Subject: [PATCH 51/64] Add one more test case --- .../03203_system_numbers_limit_and_offset.reference | 12 ++++++++++++ .../03203_system_numbers_limit_and_offset.sql | 8 ++++++++ 2 files changed, 20 insertions(+) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index 05deae711c6..e95864a6fc9 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -5,3 +5,15 @@ case 1 case 2 9990 18679 31 +0 +10 +20 +30 +40 +50 +60 +70 +80 +90 +100 +110 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index 1a1133c8a7f..dc9e06b806f 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -3,11 +3,13 @@ SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 990, 3; + SELECT 'case 2'; SELECT number FROM numbers_mt(10000) WHERE (number % 10) = 0 ORDER BY number ASC LIMIT 999, 20 SETTINGS max_block_size = 31; + --- The following query was buggy before, so let's use it as a test case WITH toUInt64(-1) AS umax, @@ -35,3 +37,9 @@ WHERE is_prime_slow(num) ORDER BY num ASC LIMIT 998, 1 SETTINGS max_block_size = 64, max_threads=16; + +SELECT number +FROM numbers_mt(120) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11 From 2442473b253c4d8cf8f4474065b196bd4be4782c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 11:12:28 +0000 Subject: [PATCH 52/64] Fix `generate_series` --- .../QueryPlan/ReadFromSystemNumbersStep.cpp | 50 +++++++++++++++---- .../02970_generate_series.reference | 1 + .../0_stateless/02970_generate_series.sql | 1 + 3 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 049fe762640..67c228d43f4 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -36,12 +36,32 @@ inline void iotaWithStepOptimized(T * begin, size_t count, T first_value, T step iotaWithStep(begin, count, first_value, step); } +/// The range is defined as [start, end) +UInt64 itemCountInRange(UInt64 start, UInt64 end, UInt64 step) +{ + const auto range_count = end - start; + if (step == 1) + return range_count; + + return (range_count - 1) / step + 1; +} + class NumbersSource : public ISource { public: NumbersSource( - UInt64 block_size_, UInt64 offset_, std::optional end_, UInt64 chunk_step_, const std::string & column_name, UInt64 step_) - : ISource(createHeader(column_name)), block_size(block_size_), next(offset_), chunk_step(chunk_step_), end(end_), step(step_) + UInt64 block_size_, + UInt64 offset_, + std::optional end_, + const std::string & column_name, + UInt64 step_in_chunk_, + UInt64 step_between_chunks_) + : ISource(createHeader(column_name)) + , block_size(block_size_) + , next(offset_) + , end(end_) + , step_in_chunk(step_in_chunk_) + , step_between_chunks(step_between_chunks_) { } String getName() const override { return "Numbers"; } @@ -59,7 +79,10 @@ protected: { if (end.value() <= next) return {}; - real_block_size = std::min(block_size, end.value() - next); + + auto max_items_to_generate = itemCountInRange(next, *end, step_in_chunk); + + real_block_size = std::min(block_size, max_items_to_generate); } auto column = ColumnUInt64::create(real_block_size); ColumnUInt64::Container & vec = column->getData(); @@ -69,9 +92,9 @@ protected: UInt64 * current_end = &vec[real_block_size]; - iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step); + iotaWithStepOptimized(pos, static_cast(current_end - pos), curr, step_in_chunk); - next += chunk_step; + next += step_between_chunks; progress(column->size(), column->byteSize()); return {Columns{std::move(column)}, real_block_size}; @@ -80,9 +103,9 @@ protected: private: UInt64 block_size; UInt64 next; - UInt64 chunk_step; std::optional end; /// not included - UInt64 step; + UInt64 step_in_chunk; + UInt64 step_between_chunks; }; struct RangeWithStep @@ -552,19 +575,24 @@ Pipe ReadFromSystemNumbersStep::makePipe() }); /// Fall back to NumbersSource + /// Range in a single block + const auto block_range = max_block_size * numbers_storage.step; + /// Step between chunks in a single source. + /// It is bigger than block_range in case of multiple threads, because we have to account for other sources as well. + const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * max_block_size * numbers_storage.step, + numbers_storage.offset + i * block_range, end, - num_streams * max_block_size * numbers_storage.step, numbers_storage.column_name, - numbers_storage.step); + numbers_storage.step, + step_between_chunks); if (numbers_storage.limit && i == 0) { - auto rows_appr = (*numbers_storage.limit - 1) / numbers_storage.step + 1; + auto rows_appr = itemCountInRange(numbers_storage.offset, *numbers_storage.limit, numbers_storage.step); if (limit > 0 && limit < rows_appr) rows_appr = query_info_limit; source->addTotalRowsApprox(rows_appr); diff --git a/tests/queries/0_stateless/02970_generate_series.reference b/tests/queries/0_stateless/02970_generate_series.reference index 6e6f3c81587..f8a8e1891b7 100644 --- a/tests/queries/0_stateless/02970_generate_series.reference +++ b/tests/queries/0_stateless/02970_generate_series.reference @@ -5,6 +5,7 @@ 501 50 17928 +17928 0 10 13 diff --git a/tests/queries/0_stateless/02970_generate_series.sql b/tests/queries/0_stateless/02970_generate_series.sql index 0844714b3a6..edae884a561 100644 --- a/tests/queries/0_stateless/02970_generate_series.sql +++ b/tests/queries/0_stateless/02970_generate_series.sql @@ -5,6 +5,7 @@ SELECT count() FROM generate_series(7, 77, 10); SELECT count() FROM generate_series(0, 1000, 2); SELECT count() FROM generate_series(0, 999, 20); SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1; +SELECT sum(generate_series) FROM generate_series(4, 1008, 4) WHERE generate_series % 7 = 1 SETTINGS max_block_size = 71; SELECT * FROM generate_series(5, 4); SELECT * FROM generate_series(0, 0); From a92eb1a4c38be7868fd0cb80261ae882c8fbb70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 11:20:05 +0000 Subject: [PATCH 53/64] Avoid creating unnecessary sources --- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index 67c228d43f4..da8a6933d6f 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -582,6 +582,10 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { + const auto source_start = numbers_storage.offset + i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_start) + break; + auto source = std::make_shared( max_block_size, numbers_storage.offset + i * block_range, From 17352b6ed1cdcad359d2f17a96040f7fdaba9422 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 10 Jul 2024 12:06:11 +0000 Subject: [PATCH 54/64] Fix the case when offset is bigger than limit --- src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp | 8 +++++--- .../03203_system_numbers_limit_and_offset.reference | 1 + .../0_stateless/03203_system_numbers_limit_and_offset.sql | 8 +++++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp index da8a6933d6f..b070bbe739b 100644 --- a/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp +++ b/src/Processors/QueryPlan/ReadFromSystemNumbersStep.cpp @@ -582,13 +582,15 @@ Pipe ReadFromSystemNumbersStep::makePipe() const auto step_between_chunks = num_streams * block_range; for (size_t i = 0; i < num_streams; ++i) { - const auto source_start = numbers_storage.offset + i * block_range; - if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_start) + const auto source_offset = i * block_range; + if (numbers_storage.limit.has_value() && *numbers_storage.limit < source_offset) break; + const auto source_start = numbers_storage.offset + source_offset; + auto source = std::make_shared( max_block_size, - numbers_storage.offset + i * block_range, + source_start, end, numbers_storage.column_name, numbers_storage.step, diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference index e95864a6fc9..960459f42ac 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference @@ -17,3 +17,4 @@ case 2 90 100 110 +4250 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index dc9e06b806f..e258b0bd2d2 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -42,4 +42,10 @@ SELECT number FROM numbers_mt(120) WHERE (number % 10) = 0 ORDER BY number ASC -SETTINGS max_block_size = 31, max_threads = 11 +SETTINGS max_block_size = 31, max_threads = 11; + +SELECT number +FROM numbers_mt(4242, 9) +WHERE (number % 10) = 0 +ORDER BY number ASC +SETTINGS max_block_size = 31, max_threads = 11; From 6be9d7b2544ccb5642999b53409c0d9326163a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 11 Jul 2024 13:40:44 +0200 Subject: [PATCH 55/64] Make the tests works with old analyzer --- .../0_stateless/03203_system_numbers_limit_and_offset.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql index e258b0bd2d2..b72ab4a98fd 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql @@ -21,10 +21,10 @@ SELECT ds FROM ( - WITH x -> arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(x)))) AS digits_sum + WITH arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum SELECT 1 + (number * 2) AS num, - digits_sum(num) AS ds + digits_sum AS ds FROM numbers_mt(10000) WHERE ds IN ( WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow From 35eabcc9d1cf3a5339c20ea34d006edfc858ae68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 13 Jul 2024 16:01:15 +0000 Subject: [PATCH 56/64] Split test cases --- ...ystem_numbers_limit_and_offset_complex.reference} | 6 ------ ...3203_system_numbers_limit_and_offset_complex.sql} | 12 ------------ ..._system_numbers_limit_and_offset_simple.reference | 6 ++++++ .../03203_system_numbers_limit_and_offset_simple.sql | 11 +++++++++++ 4 files changed, 17 insertions(+), 18 deletions(-) rename tests/queries/0_stateless/{03203_system_numbers_limit_and_offset.reference => 03203_system_numbers_limit_and_offset_complex.reference} (60%) rename tests/queries/0_stateless/{03203_system_numbers_limit_and_offset.sql => 03203_system_numbers_limit_and_offset_complex.sql} (82%) create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference create mode 100644 tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference similarity index 60% rename from tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference rename to tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference index 960459f42ac..d38b21d2d05 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.reference +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.reference @@ -1,9 +1,3 @@ -case 1 -9900 -9910 -9920 -case 2 -9990 18679 31 0 10 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql similarity index 82% rename from tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql rename to tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql index b72ab4a98fd..8e8620361fa 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -1,15 +1,3 @@ -SELECT 'case 1'; -SELECT number FROM numbers_mt(10000) -WHERE (number % 10) = 0 -ORDER BY number ASC -LIMIT 990, 3; - -SELECT 'case 2'; -SELECT number FROM numbers_mt(10000) -WHERE (number % 10) = 0 -ORDER BY number ASC -LIMIT 999, 20 SETTINGS max_block_size = 31; - --- The following query was buggy before, so let's use it as a test case WITH toUInt64(-1) AS umax, diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference new file mode 100644 index 00000000000..6aad3ee0b4b --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.reference @@ -0,0 +1,6 @@ +case 1 +9900 +9910 +9920 +case 2 +9990 diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql new file mode 100644 index 00000000000..df01ddf4312 --- /dev/null +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_simple.sql @@ -0,0 +1,11 @@ +SELECT 'case 1'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 990, 3; + +SELECT 'case 2'; +SELECT number FROM numbers_mt(10000) +WHERE (number % 10) = 0 +ORDER BY number ASC +LIMIT 999, 20 SETTINGS max_block_size = 31; From 8e469af2a41b444bbeb58b63fa0ece7d883553c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Sat, 13 Jul 2024 20:36:01 +0000 Subject: [PATCH 57/64] Make query really work with old analyzer --- ...ystem_numbers_limit_and_offset_complex.sql | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql index 8e8620361fa..756e08da27d 100644 --- a/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql +++ b/tests/queries/0_stateless/03203_system_numbers_limit_and_offset_complex.sql @@ -1,27 +1,26 @@ --- The following query was buggy before, so let's use it as a test case WITH - toUInt64(-1) AS umax, - toUInt8(ceil(log10(umax))) AS max_digits, - 9 * max_digits AS max_digits_sum, - (x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0))) AS is_prime_slow + (num > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < num) AND ((num % y) = 0)), range(toUInt64(sqrt(num)) + 1))) = 0) AS is_prime_slow SELECT num, - ds + ds, FROM ( - WITH arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum + WITH + arraySum(arrayMap(y -> toUInt8(y), splitByString('', toString(num)))) AS digits_sum SELECT 1 + (number * 2) AS num, digits_sum AS ds FROM numbers_mt(10000) WHERE ds IN ( - WITH x -> ((x > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < x) AND ((x % y) = 0)), range(toUInt64(sqrt(x)) + 1))) = 0)) AS is_prime_slow + WITH + (number > 1) AND (arraySum(arrayMap(y -> ((y > 1) AND (y < number) AND ((number % y) = 0)), range(toUInt64(sqrt(number)) + 1))) = 0) AS is_prime_slow SELECT number - FROM numbers(max_digits_sum + 1) - WHERE is_prime_slow(number) + FROM numbers(180 + 1) + WHERE is_prime_slow ) ) -WHERE is_prime_slow(num) +WHERE is_prime_slow ORDER BY num ASC LIMIT 998, 1 SETTINGS max_block_size = 64, max_threads=16; From 10c553b06450dfb34d63b8b37b8fbdd19e98ad5f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 15 Jul 2024 18:36:19 +0200 Subject: [PATCH 58/64] empty commit From bc5e2afc86317d832e6e25fb7d7719e44bdc1876 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 16 Jul 2024 00:52:17 +0200 Subject: [PATCH 59/64] Update StorageDictionary.cpp From 04735f42297c6526b837e537c74ccbf0d38c0581 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 16 Jul 2024 02:27:58 +0200 Subject: [PATCH 60/64] Fix clang tidy --- src/Interpreters/Context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index fc1e87e7b7e..2602afd8b78 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -51,7 +51,6 @@ #include #include #include -#include #include #include #include From d579e9f64f1142934eeb8628f9b2764b5a022ec9 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 16 Jul 2024 23:12:02 +0200 Subject: [PATCH 61/64] Fix logical error in PrometheusRequestHandler. --- src/Server/PrometheusRequestHandler.cpp | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/Server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp index 1f3e038a1f5..87c106c3fc0 100644 --- a/src/Server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -30,15 +30,8 @@ void PrometheusRequestHandler::handleRequest(HTTPServerRequest & request, HTTPSe response.setContentType("text/plain; version=0.0.4; charset=UTF-8"); WriteBufferFromHTTPServerResponse wb(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout, write_event); - try - { - metrics_writer->write(wb); - wb.finalize(); - } - catch (...) - { - wb.finalize(); - } + metrics_writer->write(wb); + wb.finalize(); } catch (...) { From a6ee9a98f54396394460a6e8492c371b53e47fbd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 01:59:22 +0200 Subject: [PATCH 62/64] Fix clang-tidy --- src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp index dc1749b3196..a7f56999d73 100644 --- a/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp +++ b/src/Storages/transformQueryForExternalDatabaseAnalyzer.cpp @@ -69,9 +69,6 @@ ASTPtr getASTForExternalDatabaseFromQueryTree(const QueryTreeNodePtr & query_tre bool allow_where = true; if (const auto * join_node = join_tree->as()) { - if (join_node->getStrictness() != JoinStrictness::All) - allow_where = false; - if (join_node->getKind() == JoinKind::Left) allow_where = join_node->getLeftTableExpression()->isEqual(*table_expression); else if (join_node->getKind() == JoinKind::Right) From 694c0074ab3ccafae265485062b2075b0b98d0b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 02:08:37 +0200 Subject: [PATCH 63/64] Allow to run clang-tidy with clang-19 --- .clang-tidy | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.clang-tidy b/.clang-tidy index de19059d09e..bb63bf2eea6 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -27,6 +27,8 @@ Checks: [ '-bugprone-not-null-terminated-result', '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged '-bugprone-unchecked-optional-access', + '-bugprone-crtp-constructor-accessibility', + '-bugprone-suspicious-stringview-data-usage', '-cert-dcl16-c', '-cert-dcl37-c', @@ -36,6 +38,7 @@ Checks: [ '-cert-msc51-cpp', '-cert-oop54-cpp', '-cert-oop57-cpp', + '-cert-err33-c', # Misreports on clang-19: it warns about all functions containing 'remove' in the name, not only about the standard library. '-clang-analyzer-optin.performance.Padding', @@ -99,6 +102,7 @@ Checks: [ '-modernize-use-emplace', '-modernize-use-nodiscard', '-modernize-use-trailing-return-type', + '-modernize-use-designated-initializers', '-performance-enum-size', '-performance-inefficient-string-concatenation', From 968d28f1f4bae02eae2f7b96c774ddfd9656c308 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 17 Jul 2024 07:24:46 +0200 Subject: [PATCH 64/64] Remove outdated articles from the docs --- docs/en/sql-reference/data-types/lowcardinality.md | 3 +-- docs/zh/changelog/index.md | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 133ac2bd72e..a92f6f4dce6 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -56,7 +56,6 @@ Functions: ## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://altinity.com/blog/2020-5-20-reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer) -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index c91d8bcf4d1..cd77a8c03cf 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -356,7 +356,7 @@ sidebar_label: "\u53D8\u66F4\u65E5\u5FD7" #### 新功能 {#new-feature-1} -- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 这个新功能是由Altinity的特殊要求添加到错误修正版本中的。 +- 添加 `deduplicate_blocks_in_dependent_materialized_views` 用于控制具有实例化视图的表中幂等插入的行为的选项。 [#9070](https://github.com/ClickHouse/ClickHouse/pull/9070) [(urykhy)](https://github.com/urykhy) ### ClickHouse版本v20.1.2.4,2020-01-22 {#clickhouse-release-v20-1-2-4-2020-01-22}