From 587cde853ecd45196394915705aa6872af1e3dd7 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 10 Sep 2020 22:55:36 +0300 Subject: [PATCH 01/92] Avoid skipping unused shards twice (for query processing stage and read itself) --- src/DataStreams/RemoteQueryExecutor.cpp | 6 ++- .../ClusterProxy/executeQuery.cpp | 10 ++-- src/Interpreters/ClusterProxy/executeQuery.h | 4 +- src/Interpreters/InterpreterSelectQuery.cpp | 5 +- src/Storages/IStorage.h | 9 ++-- src/Storages/LiveView/StorageBlocks.h | 2 +- src/Storages/SelectQueryInfo.h | 6 +++ src/Storages/StorageBuffer.cpp | 4 +- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageDistributed.cpp | 52 ++++++++----------- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageMaterializedView.cpp | 4 +- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageMerge.cpp | 6 +-- src/Storages/StorageMerge.h | 2 +- src/Storages/tests/gtest_storage_log.cpp | 5 +- 16 files changed, 63 insertions(+), 58 deletions(-) diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 1b1bf2af8ef..9d036f96c42 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -314,6 +315,8 @@ void RemoteQueryExecutor::sendScalars() void RemoteQueryExecutor::sendExternalTables() { + SelectQueryInfo query_info; + size_t count = multiplexed_connections->size(); { @@ -328,7 +331,8 @@ void RemoteQueryExecutor::sendExternalTables() { StoragePtr cur = table.second; auto metadata_snapshot = cur->getInMemoryMetadataPtr(); - QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context); + QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage( + context, QueryProcessingStage::Complete, query_info); Pipe pipe = cur->read( metadata_snapshot->getColumns().getNamesOfPhysical(), diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 1ebd3009ff7..74b9cd64ffc 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -4,9 +4,10 @@ #include #include #include -#include #include +#include #include +#include namespace DB @@ -75,12 +76,13 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } Pipe executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, - const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info) + IStreamFactory & stream_factory, Poco::Logger * log, + const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info) { assert(log); Pipes res; + const Settings & settings = context.getSettingsRef(); const std::string query = queryToString(query_ast); @@ -103,7 +105,7 @@ Pipe executeQuery( else throttler = user_level_throttler; - for (const auto & shard_info : cluster->getShardsInfo()) + for (const auto & shard_info : query_info.cluster->getShardsInfo()) stream_factory.createForShard(shard_info, query, query_ast, new_context, throttler, query_info, res); return Pipe::unitePipes(std::move(res)); diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index f0d9539770d..389b0bd7115 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB { @@ -26,8 +25,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). Pipe executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, - const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info); + IStreamFactory & stream_factory, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const SelectQueryInfo & query_info); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 556070d0360..119dda6a3d4 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -489,8 +489,10 @@ BlockIO InterpreterSelectQuery::execute() Block InterpreterSelectQuery::getSampleBlockImpl() { + query_info.query = query_ptr; + if (storage && !options.only_analyze) - from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_ptr); + from_stage = storage->getQueryProcessingStage(*context, options.to_stage, query_info); /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. bool first_stage = from_stage < QueryProcessingStage::WithMergeableState @@ -1415,7 +1417,6 @@ void InterpreterSelectQuery::executeFetchColumns( if (max_streams > 1 && !is_remote) max_streams *= settings.max_streams_to_max_threads_ratio; - query_info.query = query_ptr; query_info.syntax_analyzer_result = syntax_analyzer_result; query_info.sets = query_analyzer->getPreparedSets(); query_info.prewhere_info = prewhere_info; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4a2e70aa84b..e03f6f0bf17 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -55,6 +54,7 @@ using StoragePolicyPtr = std::shared_ptr; struct StreamLocalLimits; class EnabledQuota; +struct SelectQueryInfo; struct ColumnSize { @@ -212,15 +212,12 @@ public: * * SelectQueryInfo is required since the stage can depends on the query * (see Distributed() engine and optimize_skip_unused_shards). + * And to store optimized cluster (after optimize_skip_unused_shards). * * QueryProcessingStage::Enum required for Distributed over Distributed, * since it cannot return Complete for intermediate queries never. */ - QueryProcessingStage::Enum getQueryProcessingStage(const Context & context) const - { - return getQueryProcessingStage(context, QueryProcessingStage::Complete, {}); - } - virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const + virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const { return QueryProcessingStage::FetchColumns; } diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 37861b55568..6ca86132b28 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -33,7 +33,7 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override { return to_stage; } + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override { return to_stage; } Pipe read( const Names & /*column_names*/, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index 9a5d0cc6338..f5069169b55 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -63,6 +63,8 @@ using TreeRewriterResultPtr = std::shared_ptr; class ReadInOrderOptimizer; using ReadInOrderOptimizerPtr = std::shared_ptr; +class Cluster; +using ClusterPtr = std::shared_ptr; /** Query along with some additional data, * that can be used during query processing @@ -73,6 +75,10 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query + /// For optimize_skip_unused_shards + /// We can modify it in getQueryProcessingStage() + mutable ClusterPtr cluster; + TreeRewriterResultPtr syntax_analyzer_result; PrewhereInfoPtr prewhere_info; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 4b038692b77..91d5fbac3c0 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -130,7 +130,7 @@ private: }; -QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const { if (destination_id) { @@ -139,7 +139,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - return destination->getQueryProcessingStage(context, to_stage, query_ptr); + return destination->getQueryProcessingStage(context, to_stage, query_info); } return QueryProcessingStage::FetchColumns; diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index b18b574ec6c..6f81dc60346 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -54,7 +54,7 @@ public: std::string getName() const override { return "Buffer"; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; Pipe read( const Names & column_names, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b06434b6317..c961ea14f41 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -446,11 +446,31 @@ StoragePtr StorageDistributed::createWithOwnCluster( return res; } -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const { const auto & settings = context.getSettingsRef(); auto metadata_snapshot = getInMemoryMetadataPtr(); + ClusterPtr cluster = getCluster(); + query_info.cluster = cluster; + + /// Always calculate optimized cluster here, to avoid conditions during read() + /// (Anyway it will be calculated in the read()) + if (settings.optimize_skip_unused_shards) + { + ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_info.query); + if (optimized_cluster) + { + LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); + cluster = optimized_cluster; + query_info.cluster = cluster; + } + else + { + LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}", has_sharding_key ? "" : " (no sharding key)"); + } + } + if (settings.distributed_group_by_no_merge) { if (settings.distributed_group_by_no_merge == DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION) @@ -464,14 +484,6 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con if (to_stage == QueryProcessingStage::WithMergeableState) return QueryProcessingStage::WithMergeableState; - ClusterPtr cluster = getCluster(); - if (settings.optimize_skip_unused_shards) - { - ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_ptr); - if (optimized_cluster) - cluster = optimized_cluster; - } - /// If there is only one node, the query can be fully processed by the /// shard, initiator will work as a proxy only. if (getClusterQueriedNodes(settings, cluster) == 1) @@ -483,7 +495,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con sharding_key_is_deterministic) { Block sharding_key_block = sharding_key_expr->getSampleBlock(); - auto stage = getOptimizedQueryProcessingStage(query_ptr, settings.extremes, sharding_key_block); + auto stage = getOptimizedQueryProcessingStage(query_info.query, settings.extremes, sharding_key_block); if (stage) { LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage)); @@ -503,23 +515,6 @@ Pipe StorageDistributed::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - const auto & settings = context.getSettingsRef(); - - ClusterPtr cluster = getCluster(); - if (settings.optimize_skip_unused_shards) - { - ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_info.query); - if (optimized_cluster) - { - LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); - cluster = optimized_cluster; - } - else - { - LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}", has_sharding_key ? "" : " (no sharding key)"); - } - } - const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); @@ -538,8 +533,7 @@ Pipe StorageDistributed::read( : ClusterProxy::SelectStreamFactory( header, processed_stage, StorageID{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables()); - return ClusterProxy::executeQuery(select_stream_factory, cluster, log, - modified_query_ast, context, context.getSettingsRef(), query_info); + return ClusterProxy::executeQuery(select_stream_factory, log, modified_query_ast, context, query_info); } diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 7e4e9f56ab4..382bca31e6a 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -66,7 +66,7 @@ public: bool isRemote() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; Pipe read( const Names & column_names, diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a2e3fae0951..47de24768ed 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -101,9 +101,9 @@ StorageMaterializedView::StorageMaterializedView( DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } -QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const { - return getTargetTable()->getQueryProcessingStage(context, to_stage, query_ptr); + return getTargetTable()->getQueryProcessingStage(context, to_stage, query_info); } Pipe StorageMaterializedView::read( diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 1ee4246c7f1..ac2c2604ae1 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -64,7 +64,7 @@ public: void checkTableCanBeDropped() const override; void checkPartitionCanBeDropped(const ASTPtr & partition) override; - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; StoragePtr getTargetTable() const; StoragePtr tryGetTargetTable() const; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index bade0810320..1fde265e447 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -149,7 +149,7 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons } -QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const +QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const { auto stage_in_source_tables = QueryProcessingStage::FetchColumns; @@ -163,7 +163,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & if (table && table.get() != this) { ++selected_table_size; - stage_in_source_tables = std::max(stage_in_source_tables, table->getQueryProcessingStage(context, to_stage, query_ptr)); + stage_in_source_tables = std::max(stage_in_source_tables, table->getQueryProcessingStage(context, to_stage, query_info)); } iterator->next(); @@ -308,7 +308,7 @@ Pipe StorageMerge::createSources( return pipe; } - auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, modified_query_info.query); + auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, modified_query_info); if (processed_stage <= storage_stage) { /// If there are only virtual columns in query, you must request at least one other column. diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 5e2d56d18c0..5929e418755 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -27,7 +27,7 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const ASTPtr &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; Pipe read( const Names & column_names, diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 8de14b53471..1eab072fb4b 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,9 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) Names column_names; column_names.push_back("a"); - QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context); + SelectQueryInfo query_info; + QueryProcessingStage::Enum stage = table->getQueryProcessingStage( + context, QueryProcessingStage::Complete, query_info); QueryPipeline pipeline; pipeline.init(table->read(column_names, metadata_snapshot, {}, context, stage, 8192, 1)); From b838214a35a84117333e8e5a2764e28e4ec5b952 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 20 Sep 2020 20:52:17 +0300 Subject: [PATCH 02/92] Pass non-const SelectQueryInfo (and drop mutable qualifiers) --- src/DataStreams/RemoteQueryExecutor.cpp | 2 +- src/Processors/QueryPlan/ReadFromStorageStep.cpp | 2 +- src/Processors/QueryPlan/ReadFromStorageStep.h | 4 ++-- src/Storages/IStorage.cpp | 4 ++-- src/Storages/IStorage.h | 6 +++--- src/Storages/Kafka/StorageKafka.cpp | 2 +- src/Storages/Kafka/StorageKafka.h | 2 +- src/Storages/LiveView/StorageBlocks.h | 4 ++-- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/LiveView/StorageLiveView.h | 2 +- src/Storages/MergeTree/StorageFromMergeTreeDataPart.h | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 2 +- src/Storages/RabbitMQ/StorageRabbitMQ.h | 2 +- src/Storages/SelectQueryInfo.h | 10 +++++----- src/Storages/StorageBuffer.cpp | 4 ++-- src/Storages/StorageBuffer.h | 4 ++-- src/Storages/StorageDictionary.cpp | 2 +- src/Storages/StorageDictionary.h | 2 +- src/Storages/StorageDistributed.cpp | 4 ++-- src/Storages/StorageDistributed.h | 4 ++-- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/StorageGenerateRandom.h | 2 +- src/Storages/StorageHDFS.cpp | 2 +- src/Storages/StorageHDFS.h | 2 +- src/Storages/StorageInput.cpp | 2 +- src/Storages/StorageInput.h | 2 +- src/Storages/StorageJoin.cpp | 2 +- src/Storages/StorageJoin.h | 2 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageLog.h | 2 +- src/Storages/StorageMaterializeMySQL.cpp | 2 +- src/Storages/StorageMaterializeMySQL.h | 2 +- src/Storages/StorageMaterializedView.cpp | 4 ++-- src/Storages/StorageMaterializedView.h | 4 ++-- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMemory.h | 2 +- src/Storages/StorageMerge.cpp | 8 ++++---- src/Storages/StorageMerge.h | 8 ++++---- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageMongoDB.cpp | 2 +- src/Storages/StorageMongoDB.h | 2 +- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageMySQL.h | 2 +- src/Storages/StorageNull.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/StorageS3.cpp | 2 +- src/Storages/StorageS3.h | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageStripeLog.h | 2 +- src/Storages/StorageTinyLog.cpp | 2 +- src/Storages/StorageTinyLog.h | 2 +- src/Storages/StorageURL.cpp | 2 +- src/Storages/StorageURL.h | 2 +- src/Storages/StorageValues.cpp | 2 +- src/Storages/StorageValues.h | 2 +- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageView.h | 2 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/StorageXDBC.h | 2 +- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemColumns.h | 2 +- src/Storages/System/StorageSystemDetachedParts.cpp | 2 +- src/Storages/System/StorageSystemDetachedParts.h | 2 +- src/Storages/System/StorageSystemDisks.cpp | 2 +- src/Storages/System/StorageSystemDisks.h | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 2 +- src/Storages/System/StorageSystemNumbers.h | 2 +- src/Storages/System/StorageSystemOne.cpp | 2 +- src/Storages/System/StorageSystemOne.h | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.h | 2 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- src/Storages/System/StorageSystemReplicas.h | 2 +- src/Storages/System/StorageSystemStoragePolicies.cpp | 2 +- src/Storages/System/StorageSystemStoragePolicies.h | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/System/StorageSystemTables.h | 2 +- src/Storages/System/StorageSystemZeros.cpp | 2 +- src/Storages/System/StorageSystemZeros.h | 2 +- src/Storages/tests/gtest_storage_log.cpp | 2 +- 85 files changed, 106 insertions(+), 106 deletions(-) diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 9d036f96c42..38486aa6368 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -336,7 +336,7 @@ void RemoteQueryExecutor::sendExternalTables() Pipe pipe = cur->read( metadata_snapshot->getColumns().getNamesOfPhysical(), - metadata_snapshot, {}, context, + metadata_snapshot, query_info, context, read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); auto data = std::make_unique(); diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.cpp b/src/Processors/QueryPlan/ReadFromStorageStep.cpp index b085c177ad4..2a656ddac00 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.cpp +++ b/src/Processors/QueryPlan/ReadFromStorageStep.cpp @@ -19,7 +19,7 @@ ReadFromStorageStep::ReadFromStorageStep( std::shared_ptr quota_, StoragePtr storage_, const Names & required_columns_, - const SelectQueryInfo & query_info_, + SelectQueryInfo & query_info_, std::shared_ptr context_, QueryProcessingStage::Enum processing_stage_, size_t max_block_size_, diff --git a/src/Processors/QueryPlan/ReadFromStorageStep.h b/src/Processors/QueryPlan/ReadFromStorageStep.h index 98cde63a863..db62ff54a08 100644 --- a/src/Processors/QueryPlan/ReadFromStorageStep.h +++ b/src/Processors/QueryPlan/ReadFromStorageStep.h @@ -30,7 +30,7 @@ public: std::shared_ptr quota, StoragePtr storage, const Names & required_columns, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processing_stage, size_t max_block_size, @@ -53,7 +53,7 @@ private: StoragePtr storage; const Names & required_columns; - const SelectQueryInfo & query_info; + SelectQueryInfo & query_info; std::shared_ptr context; QueryProcessingStage::Enum processing_stage; size_t max_block_size; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 50b36ced19c..d2305f6416e 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -83,7 +83,7 @@ TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, cons Pipe IStorage::read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, @@ -100,7 +100,7 @@ void IStorage::read( SizeLimits & leaf_limits, std::shared_ptr quota, const Names & column_names, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e03f6f0bf17..5227c12ac85 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -217,7 +217,7 @@ public: * QueryProcessingStage::Enum required for Distributed over Distributed, * since it cannot return Complete for intermediate queries never. */ - virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const + virtual QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const { return QueryProcessingStage::FetchColumns; } @@ -275,7 +275,7 @@ public: virtual Pipe read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, @@ -291,7 +291,7 @@ public: SizeLimits & leaf_limits, std::shared_ptr quota, const Names & column_names, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, std::shared_ptr context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 9ba5ad7a65b..392d8cc8886 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -206,7 +206,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) Pipe StorageKafka::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /* query_info */, + SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 272e419bebe..44d8baca299 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -41,7 +41,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 6ca86132b28..4ad0ffb93ca 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -33,12 +33,12 @@ public: bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override { return to_stage; } + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override { return to_stage; } Pipe read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 7095357a161..4e67b15e6d7 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -424,7 +424,7 @@ void StorageLiveView::refresh() Pipe StorageLiveView::read( const Names & /*column_names*/, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index fe4be6ee08e..f6ec129fe76 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -129,7 +129,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index c13f540ad34..aa24ddcf33c 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 78aad9e6a2d..bdf110fc7c3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -479,7 +479,7 @@ void StorageRabbitMQ::unbindExchange() Pipe StorageRabbitMQ::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /* query_info */, + SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, size_t /* max_block_size */, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.h b/src/Storages/RabbitMQ/StorageRabbitMQ.h index 8d9a20f9e34..64243035fd3 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.h +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.h @@ -38,7 +38,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index f5069169b55..029f033dbd7 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -75,17 +75,17 @@ struct SelectQueryInfo ASTPtr query; ASTPtr view_query; /// Optimized VIEW query - /// For optimize_skip_unused_shards - /// We can modify it in getQueryProcessingStage() - mutable ClusterPtr cluster; + /// For optimize_skip_unused_shards. + /// Can be modified in getQueryProcessingStage() + ClusterPtr cluster; TreeRewriterResultPtr syntax_analyzer_result; PrewhereInfoPtr prewhere_info; ReadInOrderOptimizerPtr order_optimizer; - /// We can modify it while reading from storage - mutable InputOrderInfoPtr input_order_info; + /// Can be modified while reading from storage + InputOrderInfoPtr input_order_info; /// Prepared sets are used for indices by storage engine. /// Example: x IN (1, 2, 3) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 91d5fbac3c0..846f47b11c6 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -130,7 +130,7 @@ private: }; -QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const +QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { if (destination_id) { @@ -149,7 +149,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context Pipe StorageBuffer::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 6f81dc60346..455560e22da 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -54,12 +54,12 @@ public: std::string getName() const override { return "Buffer"; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index e859baa702e..2d84dde14ba 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -132,7 +132,7 @@ void StorageDictionary::checkTableCanBeDropped() const Pipe StorageDictionary::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 5c7beb88d88..576cc2de064 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -19,7 +19,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c961ea14f41..9ce3672f9a7 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -446,7 +446,7 @@ StoragePtr StorageDistributed::createWithOwnCluster( return res; } -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const +QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { const auto & settings = context.getSettingsRef(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -509,7 +509,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con Pipe StorageDistributed::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t /*max_block_size*/, diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 382bca31e6a..a9d9a868166 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -66,12 +66,12 @@ public: bool isRemote() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8f7e1563a62..10a38ed1068 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -381,7 +381,7 @@ private: Pipe StorageFile::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ea70dcd5311..2e5ad988eab 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 62d8259f705..f1e822be2d1 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -441,7 +441,7 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipe StorageGenerateRandom::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index e0f037f9a08..965c5b3a9d3 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 65b1eed7793..7df7266445b 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -265,7 +265,7 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c Pipe StorageHDFS::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageHDFS.h b/src/Storages/StorageHDFS.h index fdeaf4ae1b3..4172bce1cd1 100644 --- a/src/Storages/StorageHDFS.h +++ b/src/Storages/StorageHDFS.h @@ -22,7 +22,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 5e525210548..1f881bccf07 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -61,7 +61,7 @@ void StorageInput::setInputStream(BlockInputStreamPtr input_stream_) Pipe StorageInput::read( const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index c19b19e4703..3cb64993d45 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -20,7 +20,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 33c67229cfa..d82e8ba959d 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -448,7 +448,7 @@ private: Pipe StorageJoin::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 95037c4d33a..d66820909aa 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 944dc0e5804..86cc6afe33f 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -613,7 +613,7 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMeta Pipe StorageLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 49fc9a576c5..f28a3d67edb 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMaterializeMySQL.cpp b/src/Storages/StorageMaterializeMySQL.cpp index 7d908736bdc..32f110b8bef 100644 --- a/src/Storages/StorageMaterializeMySQL.cpp +++ b/src/Storages/StorageMaterializeMySQL.cpp @@ -34,7 +34,7 @@ StorageMaterializeMySQL::StorageMaterializeMySQL(const StoragePtr & nested_stora Pipe StorageMaterializeMySQL::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMaterializeMySQL.h b/src/Storages/StorageMaterializeMySQL.h index 4278ce64bd7..d97347ae789 100644 --- a/src/Storages/StorageMaterializeMySQL.h +++ b/src/Storages/StorageMaterializeMySQL.h @@ -22,7 +22,7 @@ public: StorageMaterializeMySQL(const StoragePtr & nested_storage_, const DatabaseMaterializeMySQL * database_); Pipe read( - const Names & column_names, const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, + const Names & column_names, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 47de24768ed..db9c290cb66 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -101,7 +101,7 @@ StorageMaterializedView::StorageMaterializedView( DatabaseCatalog::instance().addDependency(select.select_table_id, getStorageID()); } -QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const +QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { return getTargetTable()->getQueryProcessingStage(context, to_stage, query_info); } @@ -109,7 +109,7 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(cons Pipe StorageMaterializedView::read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index ac2c2604ae1..d4c218f67ba 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -64,7 +64,7 @@ public: void checkTableCanBeDropped() const override; void checkPartitionCanBeDropped(const ASTPtr & partition) override; - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; StoragePtr getTargetTable() const; StoragePtr tryGetTargetTable() const; @@ -74,7 +74,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 25e232dc4ad..78bf06e5350 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -136,7 +136,7 @@ StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription col Pipe StorageMemory::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index e67e3015028..40d31ae2dd3 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -31,7 +31,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 1fde265e447..f4a057023f6 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -149,7 +149,7 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons } -QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const SelectQueryInfo & query_info) const +QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { auto stage_in_source_tables = QueryProcessingStage::FetchColumns; @@ -176,7 +176,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & Pipe StorageMerge::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, const size_t max_block_size, @@ -274,7 +274,7 @@ Pipe StorageMerge::read( Pipe StorageMerge::createSources( const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const UInt64 max_block_size, const Block & header, @@ -459,7 +459,7 @@ void StorageMerge::alter( Block StorageMerge::getQueryHeader( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage) { diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 5929e418755..5b4ea89e475 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -27,12 +27,12 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } - QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, const SelectQueryInfo &) const override; + QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum /*to_stage*/, SelectQueryInfo &) const override; Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, @@ -78,13 +78,13 @@ protected: Block getQueryHeader( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage); Pipe createSources( const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, const UInt64 max_block_size, const Block & header, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 55fb42b550e..698be543743 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -174,7 +174,7 @@ StorageMergeTree::~StorageMergeTree() Pipe StorageMergeTree::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 5662f9e0088..318db847f0c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 210dc09ba86..b9ac2443472 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -56,7 +56,7 @@ StorageMongoDB::StorageMongoDB( Pipe StorageMongoDB::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index a8bd2f4d160..cf2c5d8c278 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -37,7 +37,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index afbca0d9430..e5c59a794e1 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -67,7 +67,7 @@ StorageMySQL::StorageMySQL( Pipe StorageMySQL::read( const Names & column_names_, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info_, + SelectQueryInfo & query_info_, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size_, diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index a7f98c4379b..acab8f9290e 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -40,7 +40,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index b5387c6b924..cc1d53a42e5 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -25,7 +25,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processing_stage*/, size_t, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9613bd5111d..f40eb378d05 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3517,7 +3517,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg Pipe StorageReplicatedMergeTree::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index d851082d5c2..6635cb1b603 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -90,7 +90,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ceca1405857..3c5310c1064 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -287,7 +287,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & Pipe StorageS3::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 5a702aa8785..1ecc9409671 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -44,7 +44,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index c4344cf6f1f..5b133ef5e90 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -300,7 +300,7 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora Pipe StorageStripeLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index f88120a932e..fb70da24ac8 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -28,7 +28,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index a59480f0a0d..81eec735c8a 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -427,7 +427,7 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage Pipe StorageTinyLog::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index dc6ff101503..0c37a16d179 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -27,7 +27,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index a6c8f1b39f8..139d9d08e44 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -180,7 +180,7 @@ std::function IStorageURLBase::getReadPOSTDataCallback( Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 7983ad71520..e4ce87af550 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -22,7 +22,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 387d2065f92..84845170d36 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -24,7 +24,7 @@ StorageValues::StorageValues( Pipe StorageValues::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 8a1a06eeb54..5729f245149 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 4b7733c1cd2..f710a1289aa 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -50,7 +50,7 @@ StorageView::StorageView( Pipe StorageView::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 682c7424b98..56c9b0b4b1f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -24,7 +24,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 3350a4352db..0b4251bc912 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -87,7 +87,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( Pipe StorageXDBC::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 508edf22684..d2bd29d7c3d 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -18,7 +18,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 81650d669dc..d83a71c2592 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -35,7 +35,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 8a7368d5ace..d55d2250081 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -243,7 +243,7 @@ private: Pipe StorageSystemColumns::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index d90cec763c9..c4f35485612 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -20,7 +20,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 5a233ed7a33..d926a7c535e 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -33,7 +33,7 @@ StorageSystemDetachedParts::StorageSystemDetachedParts(const StorageID & table_i Pipe StorageSystemDetachedParts::read( const Names & /* column_names */, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index c0f1db51642..4c6970dadd6 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -25,7 +25,7 @@ protected: Pipe read( const Names & /* column_names */, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index ed831927f16..bdd8e2b1861 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -30,7 +30,7 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) Pipe StorageSystemDisks::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index d2075c3c784..cff05242019 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index af8073e06dc..677e0c02400 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -125,7 +125,7 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipe StorageSystemNumbers::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index f907f3d5f93..d12c28c1ce2 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -32,7 +32,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 57b87e165a9..c456b22e97b 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -23,7 +23,7 @@ StorageSystemOne::StorageSystemOne(const StorageID & table_id_) Pipe StorageSystemOne::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 3469d6ccb29..8228ce465e0 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -24,7 +24,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index f590244116d..f0fa1251674 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -226,7 +226,7 @@ StoragesInfo StoragesInfoStream::next() Pipe StorageSystemPartsBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 2cb19f8f17d..eec6d5ab331 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -58,7 +58,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index f28ecdb7f65..c3ce43f0a79 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -60,7 +60,7 @@ StorageSystemReplicas::StorageSystemReplicas(const StorageID & table_id_) Pipe StorageSystemReplicas::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index c198cc29ddc..d9e364a28c0 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index c8d266644eb..67a876e69bb 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -38,7 +38,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const StorageID & tab Pipe StorageSystemStoragePolicies::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & /*query_info*/, + SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 15e5e497785..afd5e672d66 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 0ad961ad7d8..cc3c8d4386e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -488,7 +488,7 @@ private: Pipe StorageSystemTables::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 259eb096ea7..2e0b3386f8c 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -21,7 +21,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 270dcc81cc1..ed5ab93369a 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -93,7 +93,7 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith Pipe StorageSystemZeros::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo &, + SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 41de3ce6246..04733f550c1 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -23,7 +23,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 1eab072fb4b..d00e1aedb80 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -120,7 +120,7 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) context, QueryProcessingStage::Complete, query_info); QueryPipeline pipeline; - pipeline.init(table->read(column_names, metadata_snapshot, {}, context, stage, 8192, 1)); + pipeline.init(table->read(column_names, metadata_snapshot, query_info, context, stage, 8192, 1)); BlockInputStreamPtr in = std::make_shared(std::move(pipeline)); Block sample; From 5254a5ded7bdc38446eb356570b097e771d67d89 Mon Sep 17 00:00:00 2001 From: feng lv Date: Sun, 18 Oct 2020 20:18:31 +0800 Subject: [PATCH 03/92] add setting aggregate_functions_null_for_empty --- src/Core/Settings.h | 3 +- src/Interpreters/TreeRewriter.cpp | 27 ++++++++++++++++++ ...gregate_functions_null_for_empty.reference | 8 ++++++ ...ing_aggregate_functions_null_for_empty.sql | 28 +++++++++++++++++++ 4 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference create mode 100644 tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d73098ca6e0..ac743768ff8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -400,7 +400,8 @@ class IColumn; M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ - M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) + M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \ + M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below. diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 0b2f8ac3eb7..2726594f51c 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -31,6 +31,7 @@ #include #include +#include namespace DB { @@ -110,6 +111,25 @@ struct CustomizeFunctionsSuffixData char ifDistinct[] = "ifdistinct"; using CustomizeIfDistinctVisitor = InDepthNodeVisitor>, true>; +/// Used to rewrite all aggregate functions to add -OrNull suffix to them if setting `aggregate_functions_null_for_empty` is set. +struct CustomizeAggregateFunctionsSuffixData +{ + using TypeToVisit = ASTFunction; + + const String & customized_func_suffix; + + void visit(ASTFunction & func, ASTPtr &) + { + if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name) + && !endsWith(func.name, customized_func_suffix)) + { + func.name = func.name + customized_func_suffix; + } + } +}; + +using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; + /// Translate qualified names such as db.table.column, table.column, table_alias.column to names' normal form. /// Expand asterisks and qualified asterisks with column names. /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer. @@ -692,6 +712,13 @@ void TreeRewriter::normalize(ASTPtr & query, Aliases & aliases, const Settings & CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } + // Rewrite all aggregate functions to add -OrNull suffix to them + if (settings.aggregate_functions_null_for_empty) + { + CustomizeAggregateFunctionsOrNullVisitor::Data data_or_null{"OrNull"}; + CustomizeAggregateFunctionsOrNullVisitor(data_or_null).visit(query); + } + /// Creates a dictionary `aliases`: alias -> ASTPtr QueryAliasesVisitor(aliases).visit(query); diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference new file mode 100644 index 00000000000..570ea20ffad --- /dev/null +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference @@ -0,0 +1,8 @@ +0 +\N +\N +\N +45 +45 +45 +45 diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql new file mode 100644 index 00000000000..c59b592e701 --- /dev/null +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS defaults; + +CREATE TABLE defaults +( + n Int8 +)ENGINE = Memory(); + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; + +SET aggregate_functions_null_for_empty=1; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; + +INSERT INTO defaults SELECT * FROM numbers(10); + +SET aggregate_functions_null_for_empty=0; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; + +SET aggregate_functions_null_for_empty=1; + +SELECT sum(n) FROM defaults; +SELECT sumOrNull(n) FROM defaults; + +DROP TABLE defaults; From eb0e3a83d0b0ecb8f20fc9260ad4d8da02f22863 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Oct 2020 05:02:26 +0300 Subject: [PATCH 04/92] Support SNI in https connections to remote resources --- src/IO/HTTPCommon.cpp | 19 ++++++++++--------- src/IO/HTTPCommon.h | 7 ++++++- src/IO/ReadWriteBufferFromHTTP.cpp | 1 - 3 files changed, 16 insertions(+), 11 deletions(-) delete mode 100644 src/IO/ReadWriteBufferFromHTTP.cpp diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 6b7f30cd9b6..bda615edcd5 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -20,6 +20,7 @@ # include # include # include +# include #endif #include @@ -68,27 +69,27 @@ namespace throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME); } - HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host=true) + HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host = true) { HTTPSessionPtr session; if (https) + { #if USE_SSL - session = std::make_shared(); + /// Cannot resolve host in advance, otherwise SNI won't work in Poco. + session = std::make_shared(host, port); #else throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); #endif + } else - session = std::make_shared(); + { + String resolved_host = resolve_host ? DNSResolver::instance().resolveHost(host).toString() : host; + session = std::make_shared(resolved_host, port); + } ProfileEvents::increment(ProfileEvents::CreatedHTTPConnections); - if (resolve_host) - session->setHost(DNSResolver::instance().resolveHost(host).toString()); - else - session->setHost(host); - session->setPort(port); - /// doesn't work properly without patch #if defined(POCO_CLICKHOUSE_PATCH) session->setKeepAlive(keep_alive); diff --git a/src/IO/HTTPCommon.h b/src/IO/HTTPCommon.h index 66764b1c805..db0abe8fc6e 100644 --- a/src/IO/HTTPCommon.h +++ b/src/IO/HTTPCommon.h @@ -13,6 +13,7 @@ #include + namespace Poco { namespace Net @@ -24,6 +25,7 @@ namespace Net namespace DB { + constexpr int HTTP_TOO_MANY_REQUESTS = 429; class SingleEndpointHTTPSessionPool : public PoolBase @@ -39,6 +41,7 @@ private: public: SingleEndpointHTTPSessionPool(const std::string & host_, UInt16 port_, bool https_, size_t max_pool_size_); }; + using PooledHTTPSessionPtr = SingleEndpointHTTPSessionPool::Entry; using HTTPSessionPtr = std::shared_ptr; @@ -59,5 +62,7 @@ bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status); */ std::istream * receiveResponse( Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects); -void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false); + +void assertResponseIsOk( + const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false); } diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp deleted file mode 100644 index 4d046bfe2c6..00000000000 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ /dev/null @@ -1 +0,0 @@ -#include From ec3f19b79e0b967436272fd2c753a9f5ecbe0cd3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Oct 2020 05:25:54 +0300 Subject: [PATCH 05/92] Add a test --- .../0_stateless/01532_client_sni.reference | 1 + tests/queries/0_stateless/01532_client_sni.sh | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/01532_client_sni.reference create mode 100755 tests/queries/0_stateless/01532_client_sni.sh diff --git a/tests/queries/0_stateless/01532_client_sni.reference b/tests/queries/0_stateless/01532_client_sni.reference new file mode 100644 index 00000000000..7fb5e6c6321 --- /dev/null +++ b/tests/queries/0_stateless/01532_client_sni.reference @@ -0,0 +1 @@ +\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75 diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh new file mode 100755 index 00000000000..9bfaf2d5427 --- /dev/null +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +# Check that ClickHouse properly use SNI extension in Client Hello packet in HTTPS connection. + +strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru/', RawBLOB, 'data String')" 2>&1 | + grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75' +# ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +# | | | | | +# server name data | hostname | y a n d e x . r u +# extension id len: 14 | type | +# | | +# hostnames list hostname +# len, 14 len, 9 From 237384bad99d4c27fd604398860e15af03ea74ce Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 22 Oct 2020 05:26:48 +0300 Subject: [PATCH 06/92] Fix "Arcadia" --- src/IO/ya.make | 1 - 1 file changed, 1 deletion(-) diff --git a/src/IO/ya.make b/src/IO/ya.make index 28099818b46..ad6833bc2ef 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -44,7 +44,6 @@ SRCS( ReadBufferFromPocoSocket.cpp readFloatText.cpp ReadHelpers.cpp - ReadWriteBufferFromHTTP.cpp SeekAvoidingReadBuffer.cpp UseSSL.cpp WriteBufferAIO.cpp From 608c7383e92836bded9e97db5be98fc16bf27974 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Oct 2020 05:27:23 +0300 Subject: [PATCH 07/92] Update 01532_client_sni.sh --- tests/queries/0_stateless/01532_client_sni.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index 9bfaf2d5427..51693639a37 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -# Check that ClickHouse properly use SNI extension in Client Hello packet in HTTPS connection. +# Check that ClickHouse properly uses SNI extension in Client Hello packet in HTTPS connection. strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru/', RawBLOB, 'data String')" 2>&1 | grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75' From 86de5962d2d7ff5743bc938f21c3b365405aabb3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 22 Oct 2020 05:27:47 +0300 Subject: [PATCH 08/92] Update 01532_client_sni.sh --- tests/queries/0_stateless/01532_client_sni.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index 51693639a37..9284661dbc0 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -13,4 +13,4 @@ strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * # extension id len: 14 | type | # | | # hostnames list hostname -# len, 14 len, 9 +# len, 12 len, 9 From c6d450f3b5c9dab5f83f4e4c39976d89106d01ea Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 09:43:29 +0300 Subject: [PATCH 09/92] Weird test --- tests/queries/0_stateless/01532_client_sni.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index 9bfaf2d5427..a3bc7be884e 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -5,7 +5,9 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Check that ClickHouse properly use SNI extension in Client Hello packet in HTTPS connection. -strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru/', RawBLOB, 'data String')" 2>&1 | +sudo bash -c 'echo "127.0.0.1 yandex.ru" >> /etc/hosts' + +echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru:8443/', RawBLOB, 'data String')" 2>&1 | grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75' # ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # | | | | | @@ -14,3 +16,5 @@ strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * # | | # hostnames list hostname # len, 14 len, 9 + +sudo bash -c 'sed -i.bak "/yandex\.ru/d" /etc/hosts' From 7187490f98a37eeb9481f5d65de88d7f5897ed40 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 09:45:51 +0300 Subject: [PATCH 10/92] Weird test --- tests/queries/0_stateless/01532_client_sni.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index f9a8a131780..8a62cccf381 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) sudo bash -c 'echo "127.0.0.1 yandex.ru" >> /etc/hosts' -echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru:8443/', RawBLOB, 'data String')" 2>&1 | +echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru:${CLICKHOUSE_PORT_HTTPS}/', RawBLOB, 'data String')" 2>&1 | grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75' # ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # | | | | | From 81f492f8b82ef73acfc1637c2edffa6d28db7998 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 24 Oct 2020 21:50:01 +0300 Subject: [PATCH 11/92] Improve test --- tests/queries/0_stateless/01532_client_sni.reference | 2 +- tests/queries/0_stateless/01532_client_sni.sh | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01532_client_sni.reference b/tests/queries/0_stateless/01532_client_sni.reference index 7fb5e6c6321..4bf62e12856 100644 --- a/tests/queries/0_stateless/01532_client_sni.reference +++ b/tests/queries/0_stateless/01532_client_sni.reference @@ -1 +1 @@ -\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75 +\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74 diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index 8a62cccf381..92c2f6be233 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -5,16 +5,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Check that ClickHouse properly uses SNI extension in Client Hello packet in HTTPS connection. -sudo bash -c 'echo "127.0.0.1 yandex.ru" >> /etc/hosts' - -echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://yandex.ru:${CLICKHOUSE_PORT_HTTPS}/', RawBLOB, 'data String')" 2>&1 | - grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x79\x61\x6e\x64\x65\x78\x2e\x72\x75' +echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://localhost:${CLICKHOUSE_PORT_HTTPS}/', RawBLOB, 'data String')" 2>&1 | + grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74' # ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # | | | | | -# server name data | hostname | y a n d e x . r u +# server name data | hostname | l o c a l h o s t # extension id len: 14 | type | # | | # hostnames list hostname # len, 12 len, 9 - -sudo bash -c 'sed -i.bak "/yandex\.ru/d" /etc/hosts' From fb5d281c69d3e546545e2fcaaca431f98f0cb2a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Oct 2020 05:00:30 +0300 Subject: [PATCH 12/92] Better test --- tests/queries/0_stateless/01532_client_sni.reference | 2 +- tests/queries/0_stateless/01532_client_sni.sh | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/01532_client_sni.reference b/tests/queries/0_stateless/01532_client_sni.reference index 4bf62e12856..879b5791c7b 100644 --- a/tests/queries/0_stateless/01532_client_sni.reference +++ b/tests/queries/0_stateless/01532_client_sni.reference @@ -1 +1 @@ -\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74 +0000000e000c0000096c6f63616c686f7374 diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index 92c2f6be233..b3f2a12bf22 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -5,8 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Check that ClickHouse properly uses SNI extension in Client Hello packet in HTTPS connection. -echo -ne 'y\r\n' | strace -f -x -s10000 -e trace=write,sendto ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://localhost:${CLICKHOUSE_PORT_HTTPS}/', RawBLOB, 'data String')" 2>&1 | - grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74' +nc -q0 -l 5678 | xxd -p | grep -oF $'0000000e000c0000096c6f63616c686f7374' & + +${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://localhost:5678/', RawBLOB, 'data String')" 2>&1 | grep -v -F 'Timeout' + +# grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74' # ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # | | | | | # server name data | hostname | l o c a l h o s t From 7a65ef8c6704f5862ecfa9efa453bd5338572a1b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 25 Oct 2020 05:01:04 +0300 Subject: [PATCH 13/92] Better test --- tests/queries/0_stateless/01532_client_sni.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh index b3f2a12bf22..0b122713fde 100755 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ b/tests/queries/0_stateless/01532_client_sni.sh @@ -17,3 +17,5 @@ ${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://localhost:5678/', RawBLO # | | # hostnames list hostname # len, 12 len, 9 + +wait From 2e0a979e3a2aaae76a4714ef120d4319752fa86a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Oct 2020 06:39:43 +0300 Subject: [PATCH 14/92] Fix inconsistency in FormatFactory --- programs/client/Client.cpp | 2 + programs/copier/ClusterCopierApp.cpp | 2 + programs/local/LocalServer.cpp | 2 + programs/odbc-bridge/ODBCBridge.cpp | 4 + programs/server/Server.cpp | 2 + src/Formats/FormatFactory.cpp | 144 ------------------------ src/Formats/FormatFactory.h | 3 - src/Formats/registerFormats.cpp | 160 +++++++++++++++++++++++++++ src/Formats/registerFormats.h | 9 ++ src/Formats/ya.make | 1 + 10 files changed, 182 insertions(+), 147 deletions(-) create mode 100644 src/Formats/registerFormats.cpp create mode 100644 src/Formats/registerFormats.h diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 808a505b5e4..4ead4b3b41a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -461,6 +462,7 @@ private: { UseSSL use_ssl; + registerFormats(); registerFunctions(); registerAggregateFunctions(); diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 08a7e50a9d7..c2946e12c34 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -1,6 +1,7 @@ #include "ClusterCopierApp.h" #include #include +#include #include @@ -122,6 +123,7 @@ void ClusterCopierApp::mainImpl() registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); static const std::string default_database = "_local"; DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b9dde555788..9ecc2a50a42 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -224,6 +225,7 @@ try registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); /// Maybe useless if (config().has("macros")) diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 3f119fbf7ba..24aa8e32ddb 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -18,11 +18,13 @@ #include #include #include +#include #include #include #include #include + namespace DB { namespace ErrorCodes @@ -160,6 +162,8 @@ int ODBCBridge::main(const std::vector & /*args*/) if (is_help) return Application::EXIT_OK; + registerFormats(); + LOG_INFO(log, "Starting up"); Poco::Net::ServerSocket socket; auto address = socketBindListen(socket, hostname, port, log); diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index b85cb5e75f2..ed18793a537 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -266,6 +267,7 @@ int Server::main(const std::vector & /*args*/) registerStorages(); registerDictionaries(); registerDisks(); + registerFormats(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 4dc5b816420..9e04d717949 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -333,150 +333,6 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm target = std::move(file_segmentation_engine); } -/// File Segmentation Engines for parallel reading - -void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); -void registerFileSegmentationEngineCSV(FormatFactory & factory); -void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); -void registerFileSegmentationEngineRegexp(FormatFactory & factory); -void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); -void registerFileSegmentationEngineLineAsString(FormatFactory & factory); - -/// Formats for both input/output. - -void registerInputFormatNative(FormatFactory & factory); -void registerOutputFormatNative(FormatFactory & factory); - -void registerInputFormatProcessorNative(FormatFactory & factory); -void registerOutputFormatProcessorNative(FormatFactory & factory); -void registerInputFormatProcessorRowBinary(FormatFactory & factory); -void registerOutputFormatProcessorRowBinary(FormatFactory & factory); -void registerInputFormatProcessorTabSeparated(FormatFactory & factory); -void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); -void registerInputFormatProcessorValues(FormatFactory & factory); -void registerOutputFormatProcessorValues(FormatFactory & factory); -void registerInputFormatProcessorCSV(FormatFactory & factory); -void registerOutputFormatProcessorCSV(FormatFactory & factory); -void registerInputFormatProcessorTSKV(FormatFactory & factory); -void registerOutputFormatProcessorTSKV(FormatFactory & factory); -void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); -void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); -void registerInputFormatProcessorProtobuf(FormatFactory & factory); -void registerOutputFormatProcessorProtobuf(FormatFactory & factory); -void registerInputFormatProcessorTemplate(FormatFactory & factory); -void registerOutputFormatProcessorTemplate(FormatFactory & factory); -void registerInputFormatProcessorMsgPack(FormatFactory & factory); -void registerOutputFormatProcessorMsgPack(FormatFactory & factory); -void registerInputFormatProcessorORC(FormatFactory & factory); -void registerOutputFormatProcessorORC(FormatFactory & factory); -void registerInputFormatProcessorParquet(FormatFactory & factory); -void registerOutputFormatProcessorParquet(FormatFactory & factory); -void registerInputFormatProcessorArrow(FormatFactory & factory); -void registerOutputFormatProcessorArrow(FormatFactory & factory); -void registerInputFormatProcessorAvro(FormatFactory & factory); -void registerOutputFormatProcessorAvro(FormatFactory & factory); -void registerInputFormatProcessorRawBLOB(FormatFactory & factory); -void registerOutputFormatProcessorRawBLOB(FormatFactory & factory); - -/// Output only (presentational) formats. - -void registerOutputFormatNull(FormatFactory & factory); - -void registerOutputFormatProcessorPretty(FormatFactory & factory); -void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); -void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); -void registerOutputFormatProcessorVertical(FormatFactory & factory); -void registerOutputFormatProcessorJSON(FormatFactory & factory); -void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); -void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); -void registerOutputFormatProcessorXML(FormatFactory & factory); -void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); -void registerOutputFormatProcessorNull(FormatFactory & factory); -void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); -void registerOutputFormatProcessorMarkdown(FormatFactory & factory); -void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); - -/// Input only formats. - -void registerInputFormatProcessorRegexp(FormatFactory & factory); -void registerInputFormatProcessorJSONAsString(FormatFactory & factory); -void registerInputFormatProcessorLineAsString(FormatFactory & factory); -void registerInputFormatProcessorCapnProto(FormatFactory & factory); - -FormatFactory::FormatFactory() -{ - registerFileSegmentationEngineTabSeparated(*this); - registerFileSegmentationEngineCSV(*this); - registerFileSegmentationEngineJSONEachRow(*this); - registerFileSegmentationEngineRegexp(*this); - registerFileSegmentationEngineJSONAsString(*this); - registerFileSegmentationEngineLineAsString(*this); - - registerInputFormatNative(*this); - registerOutputFormatNative(*this); - - registerInputFormatProcessorNative(*this); - registerOutputFormatProcessorNative(*this); - registerInputFormatProcessorRowBinary(*this); - registerOutputFormatProcessorRowBinary(*this); - registerInputFormatProcessorTabSeparated(*this); - registerOutputFormatProcessorTabSeparated(*this); - registerInputFormatProcessorValues(*this); - registerOutputFormatProcessorValues(*this); - registerInputFormatProcessorCSV(*this); - registerOutputFormatProcessorCSV(*this); - registerInputFormatProcessorTSKV(*this); - registerOutputFormatProcessorTSKV(*this); - registerInputFormatProcessorJSONEachRow(*this); - registerOutputFormatProcessorJSONEachRow(*this); - registerInputFormatProcessorJSONCompactEachRow(*this); - registerOutputFormatProcessorJSONCompactEachRow(*this); - registerInputFormatProcessorProtobuf(*this); - registerOutputFormatProcessorProtobuf(*this); - registerInputFormatProcessorTemplate(*this); - registerOutputFormatProcessorTemplate(*this); - registerInputFormatProcessorMsgPack(*this); - registerOutputFormatProcessorMsgPack(*this); - registerInputFormatProcessorRawBLOB(*this); - registerOutputFormatProcessorRawBLOB(*this); - -#if !defined(ARCADIA_BUILD) - registerInputFormatProcessorORC(*this); - registerOutputFormatProcessorORC(*this); - registerInputFormatProcessorParquet(*this); - registerOutputFormatProcessorParquet(*this); - registerInputFormatProcessorArrow(*this); - registerOutputFormatProcessorArrow(*this); - registerInputFormatProcessorAvro(*this); - registerOutputFormatProcessorAvro(*this); -#endif - - registerOutputFormatNull(*this); - - registerOutputFormatProcessorPretty(*this); - registerOutputFormatProcessorPrettyCompact(*this); - registerOutputFormatProcessorPrettySpace(*this); - registerOutputFormatProcessorVertical(*this); - registerOutputFormatProcessorJSON(*this); - registerOutputFormatProcessorJSONCompact(*this); - registerOutputFormatProcessorJSONEachRowWithProgress(*this); - registerOutputFormatProcessorXML(*this); - registerOutputFormatProcessorODBCDriver2(*this); - registerOutputFormatProcessorNull(*this); - registerOutputFormatProcessorMySQLWire(*this); - registerOutputFormatProcessorMarkdown(*this); - registerOutputFormatProcessorPostgreSQLWire(*this); - - registerInputFormatProcessorRegexp(*this); - registerInputFormatProcessorJSONAsString(*this); - registerInputFormatProcessorLineAsString(*this); - -#if !defined(ARCADIA_BUILD) - registerInputFormatProcessorCapnProto(*this); -#endif -} FormatFactory & FormatFactory::instance() { diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index d49414e3944..dbf6a3d65b2 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -96,7 +96,6 @@ private: using FormatsDictionary = std::unordered_map; public: - static FormatFactory & instance(); BlockInputStreamPtr getInput( @@ -137,8 +136,6 @@ public: private: FormatsDictionary dict; - FormatFactory(); - const Creators & getCreators(const String & name) const; }; diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp new file mode 100644 index 00000000000..96b2c4ee384 --- /dev/null +++ b/src/Formats/registerFormats.cpp @@ -0,0 +1,160 @@ +#if !defined(ARCADIA_BUILD) +# include +#endif + +#include + + +namespace DB +{ + +/// File Segmentation Engines for parallel reading + +void registerFileSegmentationEngineTabSeparated(FormatFactory & factory); +void registerFileSegmentationEngineCSV(FormatFactory & factory); +void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory); +void registerFileSegmentationEngineRegexp(FormatFactory & factory); +void registerFileSegmentationEngineJSONAsString(FormatFactory & factory); +void registerFileSegmentationEngineLineAsString(FormatFactory & factory); + +/// Formats for both input/output. + +void registerInputFormatNative(FormatFactory & factory); +void registerOutputFormatNative(FormatFactory & factory); + +void registerInputFormatProcessorNative(FormatFactory & factory); +void registerOutputFormatProcessorNative(FormatFactory & factory); +void registerInputFormatProcessorRowBinary(FormatFactory & factory); +void registerOutputFormatProcessorRowBinary(FormatFactory & factory); +void registerInputFormatProcessorTabSeparated(FormatFactory & factory); +void registerOutputFormatProcessorTabSeparated(FormatFactory & factory); +void registerInputFormatProcessorValues(FormatFactory & factory); +void registerOutputFormatProcessorValues(FormatFactory & factory); +void registerInputFormatProcessorCSV(FormatFactory & factory); +void registerOutputFormatProcessorCSV(FormatFactory & factory); +void registerInputFormatProcessorTSKV(FormatFactory & factory); +void registerOutputFormatProcessorTSKV(FormatFactory & factory); +void registerInputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory); +void registerInputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompactEachRow(FormatFactory & factory); +void registerInputFormatProcessorProtobuf(FormatFactory & factory); +void registerOutputFormatProcessorProtobuf(FormatFactory & factory); +void registerInputFormatProcessorTemplate(FormatFactory & factory); +void registerOutputFormatProcessorTemplate(FormatFactory & factory); +void registerInputFormatProcessorMsgPack(FormatFactory & factory); +void registerOutputFormatProcessorMsgPack(FormatFactory & factory); +void registerInputFormatProcessorORC(FormatFactory & factory); +void registerOutputFormatProcessorORC(FormatFactory & factory); +void registerInputFormatProcessorParquet(FormatFactory & factory); +void registerOutputFormatProcessorParquet(FormatFactory & factory); +void registerInputFormatProcessorArrow(FormatFactory & factory); +void registerOutputFormatProcessorArrow(FormatFactory & factory); +void registerInputFormatProcessorAvro(FormatFactory & factory); +void registerOutputFormatProcessorAvro(FormatFactory & factory); +void registerInputFormatProcessorRawBLOB(FormatFactory & factory); +void registerOutputFormatProcessorRawBLOB(FormatFactory & factory); + +/// Output only (presentational) formats. + +void registerOutputFormatNull(FormatFactory & factory); + +void registerOutputFormatProcessorPretty(FormatFactory & factory); +void registerOutputFormatProcessorPrettyCompact(FormatFactory & factory); +void registerOutputFormatProcessorPrettySpace(FormatFactory & factory); +void registerOutputFormatProcessorVertical(FormatFactory & factory); +void registerOutputFormatProcessorJSON(FormatFactory & factory); +void registerOutputFormatProcessorJSONCompact(FormatFactory & factory); +void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factory); +void registerOutputFormatProcessorXML(FormatFactory & factory); +void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory); +void registerOutputFormatProcessorNull(FormatFactory & factory); +void registerOutputFormatProcessorMySQLWire(FormatFactory & factory); +void registerOutputFormatProcessorMarkdown(FormatFactory & factory); +void registerOutputFormatProcessorPostgreSQLWire(FormatFactory & factory); + +/// Input only formats. + +void registerInputFormatProcessorRegexp(FormatFactory & factory); +void registerInputFormatProcessorJSONAsString(FormatFactory & factory); +void registerInputFormatProcessorLineAsString(FormatFactory & factory); +void registerInputFormatProcessorCapnProto(FormatFactory & factory); + + +void registerFormats() +{ + auto & factory = FormatFactory::instance(); + + registerFileSegmentationEngineTabSeparated(factory); + registerFileSegmentationEngineCSV(factory); + registerFileSegmentationEngineJSONEachRow(factory); + registerFileSegmentationEngineRegexp(factory); + registerFileSegmentationEngineJSONAsString(factory); + registerFileSegmentationEngineLineAsString(factory); + + registerInputFormatNative(factory); + registerOutputFormatNative(factory); + + registerInputFormatProcessorNative(factory); + registerOutputFormatProcessorNative(factory); + registerInputFormatProcessorRowBinary(factory); + registerOutputFormatProcessorRowBinary(factory); + registerInputFormatProcessorTabSeparated(factory); + registerOutputFormatProcessorTabSeparated(factory); + registerInputFormatProcessorValues(factory); + registerOutputFormatProcessorValues(factory); + registerInputFormatProcessorCSV(factory); + registerOutputFormatProcessorCSV(factory); + registerInputFormatProcessorTSKV(factory); + registerOutputFormatProcessorTSKV(factory); + registerInputFormatProcessorJSONEachRow(factory); + registerOutputFormatProcessorJSONEachRow(factory); + registerInputFormatProcessorJSONCompactEachRow(factory); + registerOutputFormatProcessorJSONCompactEachRow(factory); + registerInputFormatProcessorProtobuf(factory); + registerOutputFormatProcessorProtobuf(factory); + registerInputFormatProcessorTemplate(factory); + registerOutputFormatProcessorTemplate(factory); + registerInputFormatProcessorMsgPack(factory); + registerOutputFormatProcessorMsgPack(factory); + registerInputFormatProcessorRawBLOB(factory); + registerOutputFormatProcessorRawBLOB(factory); + +#if !defined(ARCADIA_BUILD) + registerInputFormatProcessorORC(factory); + registerOutputFormatProcessorORC(factory); + registerInputFormatProcessorParquet(factory); + registerOutputFormatProcessorParquet(factory); + registerInputFormatProcessorArrow(factory); + registerOutputFormatProcessorArrow(factory); + registerInputFormatProcessorAvro(factory); + registerOutputFormatProcessorAvro(factory); +#endif + + registerOutputFormatNull(factory); + + registerOutputFormatProcessorPretty(factory); + registerOutputFormatProcessorPrettyCompact(factory); + registerOutputFormatProcessorPrettySpace(factory); + registerOutputFormatProcessorVertical(factory); + registerOutputFormatProcessorJSON(factory); + registerOutputFormatProcessorJSONCompact(factory); + registerOutputFormatProcessorJSONEachRowWithProgress(factory); + registerOutputFormatProcessorXML(factory); + registerOutputFormatProcessorODBCDriver2(factory); + registerOutputFormatProcessorNull(factory); + registerOutputFormatProcessorMySQLWire(factory); + registerOutputFormatProcessorMarkdown(factory); + registerOutputFormatProcessorPostgreSQLWire(factory); + + registerInputFormatProcessorRegexp(factory); + registerInputFormatProcessorJSONAsString(factory); + registerInputFormatProcessorLineAsString(factory); + +#if !defined(ARCADIA_BUILD) + registerInputFormatProcessorCapnProto(factory); +#endif +} + +} + diff --git a/src/Formats/registerFormats.h b/src/Formats/registerFormats.h new file mode 100644 index 00000000000..e4ff79248d0 --- /dev/null +++ b/src/Formats/registerFormats.h @@ -0,0 +1,9 @@ +#pragma once + +namespace DB +{ + +void registerFormats(); + +} + diff --git a/src/Formats/ya.make b/src/Formats/ya.make index b4f7b073e21..fb6c5cf8cf1 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -23,6 +23,7 @@ SRCS( ProtobufReader.cpp ProtobufSchemas.cpp ProtobufWriter.cpp + registerFormats.cpp verbosePrintString.cpp ) From a7b5db912c44a95478fd1a3f6b7d52a0c2ff5f58 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Oct 2020 06:42:49 +0300 Subject: [PATCH 15/92] odbc-bridge: two libraries less --- programs/odbc-bridge/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 8cfa110adad..ae60bcd5f0b 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,8 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES ) set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE - clickhouse_parsers - clickhouse_aggregate_functions daemon dbms Poco::Data From 0440958c3ecb47066328264a0bf67a11a6198da2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Oct 2020 07:27:12 +0300 Subject: [PATCH 16/92] Add missing modification --- programs/obfuscator/Obfuscator.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 756aab0a574..b326d550fec 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -1047,6 +1048,8 @@ public: int mainEntryClickHouseObfuscator(int argc, char ** argv) try { + registerFormats(); + using namespace DB; namespace po = boost::program_options; From f2bdfcef41f6c2ff261f7040e128a8d7dedea80c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Oct 2020 08:18:42 +0300 Subject: [PATCH 17/92] Add missing modification --- programs/obfuscator/Obfuscator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index b326d550fec..d0d7f201c68 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1048,11 +1048,11 @@ public: int mainEntryClickHouseObfuscator(int argc, char ** argv) try { - registerFormats(); - using namespace DB; namespace po = boost::program_options; + registerFormats(); + po::options_description description = createOptionsDescription("Options", getTerminalWidth()); description.add_options() ("help", "produce help message") From 1520b6aa2aa5833637ea45fab55ce3e0b57caf60 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Oct 2020 08:27:55 +0300 Subject: [PATCH 18/92] Take out odbc-bridge from clickhouse bundle --- programs/CMakeLists.txt | 3 --- programs/odbc-bridge/CMakeLists.txt | 23 ++++++++++------------- programs/odbc-bridge/tests/CMakeLists.txt | 3 +-- 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 3577ee3df31..3817bc62bcb 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -227,9 +227,6 @@ else () install (FILES ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () - if(ENABLE_CLICKHOUSE_ODBC_BRIDGE) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-odbc-bridge) - endif() install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index ae60bcd5f0b..043c0d5dd7d 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,17 +10,8 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp SchemaAllowedHandler.cpp validateODBCConnectionString.cpp + odbc-bridge.cpp ) -set (CLICKHOUSE_ODBC_BRIDGE_LINK - PRIVATE - daemon - dbms - Poco::Data - PUBLIC - Poco::Data::ODBC -) - -clickhouse_program_add_library(odbc-bridge) if (OS_LINUX) # clickhouse-odbc-bridge is always a separate binary. @@ -28,10 +19,16 @@ if (OS_LINUX) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") endif () -add_executable(clickhouse-odbc-bridge odbc-bridge.cpp) -set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) +add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) -clickhouse_program_link_split_binary(odbc-bridge) +target_link_libraries(clickhouse-odbc-bridge PRIVATE + daemon + clickhouse_common_io + Poco::Data + Poco::Data::ODBC +) + +set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) diff --git a/programs/odbc-bridge/tests/CMakeLists.txt b/programs/odbc-bridge/tests/CMakeLists.txt index 60e7afab969..3e0af8c940f 100644 --- a/programs/odbc-bridge/tests/CMakeLists.txt +++ b/programs/odbc-bridge/tests/CMakeLists.txt @@ -1,3 +1,2 @@ -add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp) -clickhouse_target_link_split_lib(validate-odbc-connection-string odbc-bridge) +add_executable (validate-odbc-connection-string validate-odbc-connection-string.cpp ../validateODBCConnectionString.cpp) target_link_libraries (validate-odbc-connection-string PRIVATE clickhouse_common_io) From 2892252b3695080e73e9326ff505e170c21b2386 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 22 Oct 2020 23:23:44 +0300 Subject: [PATCH 19/92] Support collate in LowCardinality(String) and Nullable(String) and refactor ColumnString --- src/Columns/ColumnLowCardinality.cpp | 78 ++++-- src/Columns/ColumnLowCardinality.h | 9 + src/Columns/ColumnNullable.cpp | 51 +++- src/Columns/ColumnNullable.h | 9 + src/Columns/ColumnString.cpp | 230 +++++------------- src/Columns/ColumnString.h | 16 +- src/Interpreters/sortBlock.cpp | 57 ++++- ...01532_collate_in_low_cardinality.reference | 28 +++ .../01532_collate_in_low_cardinality.sql | 18 ++ .../01533_collate_in_nullable.reference | 36 +++ .../0_stateless/01533_collate_in_nullable.sql | 18 ++ 11 files changed, 342 insertions(+), 208 deletions(-) create mode 100644 tests/queries/0_stateless/01532_collate_in_low_cardinality.reference create mode 100644 tests/queries/0_stateless/01532_collate_in_low_cardinality.sql create mode 100644 tests/queries/0_stateless/01533_collate_in_nullable.reference create mode 100644 tests/queries/0_stateless/01533_collate_in_nullable.sql diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 64b503ed325..2e941a3ef8a 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -14,6 +15,7 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; + extern const int BAD_COLLATION; } namespace @@ -295,14 +297,24 @@ void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num compare_results, direction, nan_direction_hint); } -void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator) const { if (limit == 0) limit = size(); size_t unique_limit = getDictionary().size(); Permutation unique_perm; - getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->getPermutationWithCollation(*collator, reverse, unique_limit, unique_perm); + } + else + getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); /// TODO: optimize with sse. @@ -330,7 +342,8 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di } } -void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnLowCardinality::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const { if (equal_ranges.empty()) return; @@ -345,20 +358,17 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan EqualRanges new_ranges; SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto& [first, last] = equal_ranges[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (comparator(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -379,17 +389,12 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) - {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) != 0) + if (comparator(res[new_first],res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -401,7 +406,7 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (getDictionary().compareAt(getIndexes().getUInt(res[new_first]), getIndexes().getUInt(res[j]), getDictionary(), nan_direction_hint) == 0) + if (comparator(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -412,6 +417,43 @@ void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan } } +void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res); +} + +void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + auto comparator = [this, nan_direction_hint, reverse](size_t lhs, size_t rhs) + { + int ret = getDictionary().compareAt(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), getDictionary(), nan_direction_hint); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + +void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator); +} + +void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +{ + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + auto comparator = [this, &column_string, &collator, reverse](size_t lhs, size_t rhs) + { + int ret = column_string->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *column_string, collator); + return reverse ? -ret : ret; + }; + + updatePermutationImpl(limit, res, equal_ranges, comparator); +} + std::vector ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const { auto columns = getIndexes().scatter(num_columns, selector); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 0aeda4567fd..e45449873fc 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -31,6 +31,11 @@ class ColumnLowCardinality final : public COWHelper + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -129,6 +134,10 @@ public: void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const; + + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + ColumnPtr replicate(const Offsets & offsets) const override { return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 51248a598af..63b86f38342 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -17,6 +18,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; + extern const int BAD_COLLATION; } @@ -256,10 +258,21 @@ void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const { /// Cannot pass limit because of unknown amount of NULLs. - getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); + + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(&getNestedColumn()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->getPermutationWithCollation(*collator, reverse, 0, res); + } + else + getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); if ((null_direction_hint > 0) != reverse) { @@ -329,7 +342,7 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi } } -void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; @@ -432,12 +445,42 @@ void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_dire } } - getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); + if (collator) + { + /// Collations are supported only for ColumnString + const ColumnString * column_string = checkAndGetColumn(getNestedColumn()); + if (!column_string) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + + column_string->updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); + } + else + getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); equal_ranges = std::move(new_ranges); std::move(null_ranges.begin(), null_ranges.end(), std::back_inserter(equal_ranges)); } +void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res); +} + +void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_ranges); +} + +void ColumnNullable::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const +{ + getPermutationImpl(reverse, limit, null_direction_hint, res, &collator); +} + +void ColumnNullable::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(reverse, limit, null_direction_hint, res, equal_range, &collator); +} + void ColumnNullable::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index e4033e22737..3d7a7970bd3 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -6,6 +6,7 @@ #include #include +class Collator; namespace DB { @@ -30,6 +31,11 @@ private: ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_); ColumnNullable(const ColumnNullable &) = default; + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; + public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -94,6 +100,9 @@ public: int direction, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const; + void updatePermutationWithCollation( + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index cd06ea20f83..27dd9e54685 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,22 +284,23 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } -template -struct ColumnString::less +struct ColumnString::cmp { const ColumnString & parent; - explicit less(const ColumnString & parent_) : parent(parent_) {} - bool operator()(size_t lhs, size_t rhs) const + bool reverse; + cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); - return positive ? (res < 0) : (res > 0); + return reverse ? -res : res; } }; -void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +template +void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const { size_t s = offsets.size(); res.resize(s); @@ -309,23 +310,16 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio if (limit >= s) limit = 0; + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), less(*this)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), less(*this)); - else - std::sort(res.begin(), res.end(), less(*this)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +template +void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const { if (equal_ranges.empty()) return; @@ -340,21 +334,17 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc if (limit) --number_of_ranges; + auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, less(*this)); - else - std::sort(res.begin() + first, res.begin() + last, less(*this)); + std::sort(res.begin() + first, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (comparator(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -375,17 +365,12 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + if (comparator(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -395,9 +380,7 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc size_t new_last = limit; for (size_t j = limit; j < last; ++j) { - if (memcmpSmallAllowOverflow15( - chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, - chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0) + if (comparator(res[j], res[new_first]) == 0) { std::swap(res[j], res[new_last]); ++new_last; @@ -408,6 +391,45 @@ void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direc } } +void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const +{ + getPermutationImpl(limit, res, cmp(*this, reverse)); +} + +void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(limit, res, equal_ranges, cmp(*this, reverse)); +} + +struct ColumnString::cmpWithCollation +{ + const ColumnString & parent; + const Collator & collator; + bool reverse; + + cmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + + int operator()(size_t lhs, size_t rhs) const + { + int res = collator.compare( + reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), + reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); + + return reverse ? -res : res; + } +}; + +void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const +{ + getPermutationImpl(limit, res, cmpWithCollation(*this, collator, reverse)); +} + +void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(limit, res, equal_ranges, cmpWithCollation(*this, collator, reverse)); +} + + ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { size_t col_size = size(); @@ -476,13 +498,13 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - less less_op(*this); + cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { - if (less_op(i, min_idx)) + if (cmp_op(i, min_idx) < 0) min_idx = i; - else if (less_op(max_idx, i)) + else if (cmp_op(max_idx, i) < 0) max_idx = i; } @@ -500,134 +522,6 @@ int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs reinterpret_cast(&rhs.chars[rhs.offsetAt(m)]), rhs.sizeAt(m)); } - -template -struct ColumnString::lessWithCollation -{ - const ColumnString & parent; - const Collator & collator; - - lessWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - int res = collator.compare( - reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), - reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); - - return positive ? (res < 0) : (res > 0); - } -}; - -void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const -{ - size_t s = offsets.size(); - res.resize(s); - for (size_t i = 0; i < s; ++i) - res[i] = i; - - if (limit >= s) - limit = 0; - - if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), lessWithCollation(*this, collator)); - } - else - { - if (reverse) - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - else - std::sort(res.begin(), res.end(), lessWithCollation(*this, collator)); - } -} - -void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const -{ - if (equal_ranges.empty()) - return; - - if (limit >= size() || limit >= equal_ranges.back().second) - limit = 0; - - size_t number_of_ranges = equal_ranges.size(); - if (limit) - --number_of_ranges; - - EqualRanges new_ranges; - SCOPE_EXIT({equal_ranges = std::move(new_ranges);}); - - for (size_t i = 0; i < number_of_ranges; ++i) - { - const auto& [first, last] = equal_ranges[i]; - - if (reverse) - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - else - std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); - auto new_first = first; - for (auto j = first + 1; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - if (last - new_first > 1) - new_ranges.emplace_back(new_first, last); - } - - if (limit) - { - const auto & [first, last] = equal_ranges.back(); - - if (limit < first || limit > last) - return; - - /// Since then we are working inside the interval. - - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); - - auto new_first = first; - for (auto j = first + 1; j < limit; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) - { - if (j - new_first > 1) - new_ranges.emplace_back(new_first, j); - - new_first = j; - } - } - auto new_last = limit; - for (auto j = limit; j < last; ++j) - { - if (collator.compare( - reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), - reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0) - { - std::swap(res[new_last], res[j]); - ++new_last; - } - } - if (new_last - new_first > 1) - new_ranges.emplace_back(new_first, new_last); - } -} - void ColumnString::protect() { getChars().protect(); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 19398e07b83..c91d982f126 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,15 +42,19 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } - template - struct less; + struct cmp; - template - struct lessWithCollation; + struct cmpWithCollation; ColumnString() = default; ColumnString(const ColumnString & src); + template + void getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + public: const char * getFamilyName() const override { return "String"; } TypeIndex getDataType() const override { return TypeIndex::String; } @@ -233,12 +237,12 @@ public: void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; /// Sorting with respect of collation. void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const; ColumnPtr replicate(const Offsets & replicate_offsets) const override; diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index c2436806fcd..5d114c746e5 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include #include @@ -20,6 +22,24 @@ static bool isCollationRequired(const SortColumnDescription & description) return description.collator != nullptr; } +static bool isCollationSupported(const IColumn * column) +{ + if (column->getDataType() == TypeIndex::String) + return true; + + if (column->getDataType() == TypeIndex::Nullable) + { + const ColumnNullable * column_nullable = assert_cast(column); + return isCollationSupported(&column_nullable->getNestedColumn()); + } + + if (column->getDataType() == TypeIndex::LowCardinality) + { + const ColumnLowCardinality * column_low_cardinality = assert_cast(column); + return isCollationSupported(column_low_cardinality->getDictionary().getNestedColumn().get()); + } + return false; +} ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { @@ -101,7 +121,6 @@ struct PartialSortingLessWithCollation } }; - void sortBlock(Block & block, const SortDescription & description, UInt64 limit) { if (!block) @@ -120,14 +139,18 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) bool is_column_const = false; if (isCollationRequired(description[0])) { - /// it it's real string column, than we need sort + /// Check if column supports collations + if (!isCollationSupported(column)) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (const ColumnString * column_string = checkAndGetColumn(column)) column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); - else if (checkAndGetColumnConstData(column)) + else if (const ColumnNullable * column_nullable = checkAndGetColumn(column)) + column_nullable->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); + else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column)) + column_low_cardinality->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); + else if (isColumnConst(*column)) is_column_const = true; - else - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); - } else if (!isColumnConst(*column)) { @@ -163,8 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) const IColumn * column = columns_with_sort_desc[i].column; if (isCollationRequired(description[i])) { - if (!checkAndGetColumn(column) && !checkAndGetColumnConstData(column)) - throw Exception("Collations could be specified only for String columns.", ErrorCodes::BAD_COLLATION); + if (!isCollationSupported(column)) + throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); need_collation = true; } @@ -187,10 +210,20 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (isCollationRequired(column.description)) { - const ColumnString & column_string = assert_cast(*column.column); - column_string.updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + if (const ColumnString * column_string = checkAndGetColumn(column.column)) + column_string->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + + else if (const ColumnNullable * column_nullable = checkAndGetColumn(column.column)) + column_nullable->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + + else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column.column)) + column_low_cardinality->updatePermutationWithCollation( + *column.description.collator, + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); } else { diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference new file mode 100644 index 00000000000..b7a4830f9cf --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference @@ -0,0 +1,28 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +Order by tuple without collate +1 Ё +1 а +1 ё +2 А +2 Я +2 я +Order by tuple with collate +1 а +1 ё +1 Ё +2 А +2 я +2 Я diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql new file mode 100644 index 00000000000..0f4194ee671 --- /dev/null +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_collate; + +CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'); + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +DROP TABLE test_collate; + diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.reference b/tests/queries/0_stateless/01533_collate_in_nullable.reference new file mode 100644 index 00000000000..6bb06cbc8b5 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.reference @@ -0,0 +1,36 @@ +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01533_collate_in_nullable.sql b/tests/queries/0_stateless/01533_collate_in_nullable.sql new file mode 100644 index 00000000000..40b48bee465 --- /dev/null +++ b/tests/queries/0_stateless/01533_collate_in_nullable.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS test_collate; + +CREATE TABLE test_collate (x UInt32, s Nullable(String)) ENGINE=Memory(); + +INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (1, null), (2, 'А'), (2, 'я'), (2, 'Я'), (2, null); + +SELECT 'Order by without collate'; +SELECT * FROM test_collate ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate ORDER BY s COLLATE 'ru'; + +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; + +DROP TABLE test_collate; + From 18d8d5d81f5844f77ba3bac972dc7f893fc014d7 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Fri, 23 Oct 2020 11:02:40 +0300 Subject: [PATCH 20/92] Add tests in arcadia_skip_list.txt --- tests/queries/0_stateless/arcadia_skip_list.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index f5b81c08520..cc03dee9eb0 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -155,5 +155,9 @@ 01509_dictionary_preallocate 01526_max_untracked_memory 01530_drop_database_atomic_sync +01532_collate_in_low_cardinality +01533_collate_in_nullable +01542_collate_in_array +01543_collate_in_tuple 01547_query_log_current_database 01548_query_log_query_execution_ms From 97a6e3dde2bb0b99a1323e05370ae07d6fc3012c Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Sat, 24 Oct 2020 20:15:03 +0300 Subject: [PATCH 21/92] Skip collate test in fasttest --- docker/test/fasttest/run.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 8300c31681e..f11e51dee98 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -240,6 +240,8 @@ TESTS_TO_SKIP=( 01354_order_by_tuple_collate_const 01355_ilike 01411_bayesian_ab_testing + 01532_collate_in_low_cardinality + 01533_collate_in_nullable _orc_ arrow avro From 5c296365e2c808e4ab601c389f38c7485f072e0b Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 27 Oct 2020 14:12:48 +0300 Subject: [PATCH 22/92] Fix build error --- src/Columns/ColumnString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 27dd9e54685..3093ae10646 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -288,7 +288,7 @@ struct ColumnString::cmp { const ColumnString & parent; bool reverse; - cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( From 4d399fff3ee8a0922ca3b33e80daf6fae5730d69 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 29 Oct 2020 14:24:01 +0300 Subject: [PATCH 23/92] Support collation for Array and Tuple --- docker/test/fasttest/run.sh | 2 + src/Columns/ColumnArray.cpp | 120 +++++++++++------- src/Columns/ColumnArray.h | 15 +++ src/Columns/ColumnLowCardinality.cpp | 33 ++--- src/Columns/ColumnLowCardinality.h | 19 ++- src/Columns/ColumnNullable.cpp | 30 +++-- src/Columns/ColumnNullable.h | 20 +-- src/Columns/ColumnString.cpp | 41 +++--- src/Columns/ColumnString.h | 20 +-- src/Columns/ColumnTuple.cpp | 89 ++++++++++--- src/Columns/ColumnTuple.h | 16 ++- src/Columns/IColumn.h | 23 +++- src/Core/SortCursor.h | 7 +- src/Interpreters/sortBlock.cpp | 56 ++------ ...01532_collate_in_low_cardinality.reference | 36 ++++++ .../01532_collate_in_low_cardinality.sql | 17 ++- .../01542_collate_in_array.reference | 50 ++++++++ .../0_stateless/01542_collate_in_array.sql | 34 +++++ .../01543_collate_in_tuple.reference | 60 +++++++++ .../0_stateless/01543_collate_in_tuple.sql | 34 +++++ 20 files changed, 524 insertions(+), 198 deletions(-) create mode 100644 tests/queries/0_stateless/01542_collate_in_array.reference create mode 100644 tests/queries/0_stateless/01542_collate_in_array.sql create mode 100644 tests/queries/0_stateless/01543_collate_in_tuple.reference create mode 100644 tests/queries/0_stateless/01543_collate_in_tuple.sql diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index f11e51dee98..ad25be9e2de 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -242,6 +242,8 @@ TESTS_TO_SKIP=( 01411_bayesian_ab_testing 01532_collate_in_low_cardinality 01533_collate_in_nullable + 01542_collate_in_array + 01543_collate_in_tuple _orc_ arrow avro diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index e4d17c586ac..c061dd50642 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -324,8 +324,7 @@ void ColumnArray::popBack(size_t n) offsets_data.resize_assume_reserved(offsets_data.size() - n); } - -int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator) const { const ColumnArray & rhs = assert_cast(rhs_); @@ -334,8 +333,15 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir size_t rhs_size = rhs.sizeAt(m); size_t min_size = std::min(lhs_size, rhs_size); for (size_t i = 0; i < min_size; ++i) - if (int res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint)) + { + int res; + if (collator) + res = getData().compareAtWithCollation(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint, *collator); + else + res = getData().compareAt(offsetAt(n) + i, rhs.offsetAt(m) + i, *rhs.data.get(), nan_direction_hint); + if (res) return res; + } return lhs_size < rhs_size ? -1 @@ -344,6 +350,16 @@ int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_dir : 1); } +int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint); +} + +int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator); +} + void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -352,27 +368,25 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -namespace -{ - template - struct Less +struct ColumnArray::Cmp { + const ColumnArray & parent; + int nan_direction_hint; + bool reverse; + const Collator * collator; + + Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} + + int operator()(size_t lhs, size_t rhs) const { - const ColumnArray & parent; - int nan_direction_hint; - - Less(const ColumnArray & parent_, int nan_direction_hint_) - : parent(parent_), nan_direction_hint(nan_direction_hint_) {} - - bool operator()(size_t lhs, size_t rhs) const - { - if (positive) - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) < 0; - else - return parent.compareAt(lhs, rhs, parent, nan_direction_hint) > 0; - } - }; -} - + int res; + if (collator) + res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator); + else + res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); + return reverse ? -res : res; + } +}; void ColumnArray::reserve(size_t n) { @@ -753,7 +767,8 @@ ColumnPtr ColumnArray::indexImpl(const PaddedPODArray & indexes, size_t limit INSTANTIATE_INDEX_IMPL(ColumnArray) -void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnArray::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = size(); if (limit >= s) @@ -763,23 +778,16 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h for (size_t i = 0; i < s; ++i) res[i] = i; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + if (limit) - { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(*this, nan_direction_hint)); - } + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); else - { - if (reverse) - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(*this, nan_direction_hint)); - } + std::sort(res.begin(), res.end(), less); } -void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +template +void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const { if (equal_range.empty()) return; @@ -792,20 +800,19 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio if (limit) --number_of_ranges; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; + EqualRanges new_ranges; for (size_t i = 0; i < number_of_ranges; ++i) { const auto & [first, last] = equal_range[i]; - if (reverse) - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); + std::sort(res.begin() + first, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -827,14 +834,11 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio /// Since then we are working inside the interval. - if (reverse) - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); - else - std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less); auto new_first = first; for (auto j = first + 1; j < limit; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + if (cmp(res[new_first], res[j]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -845,7 +849,7 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio auto new_last = limit; for (auto j = limit; j < last; ++j) { - if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0) + if (cmp(res[new_first], res[j]) == 0) { std::swap(res[new_last], res[j]); ++new_last; @@ -859,6 +863,26 @@ void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_directio equal_range = std::move(new_ranges); } +void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse)); +} + +void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse)); +} + +void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse, &collator)); +} + +void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse, &collator)); +} + ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { if (replicate_offsets.empty()) diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index cec8387ab66..028eaba73c5 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -77,8 +77,11 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -132,6 +135,8 @@ public: return false; } + bool isCollationSupported() const override { return getData().isCollationSupported(); } + private: WrappedPtr data; WrappedPtr offsets; @@ -169,6 +174,16 @@ private: ColumnPtr filterTuple(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterNullable(const Filter & filt, ssize_t result_size_hint) const; ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; + + struct Cmp; }; diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 2e941a3ef8a..37e97da88b9 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -15,7 +15,6 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int LOGICAL_ERROR; - extern const int BAD_COLLATION; } namespace @@ -280,14 +279,26 @@ MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const return ColumnLowCardinality::create(IColumn::mutate(std::move(unique_ptr)), getIndexes().cloneResized(size)); } -int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const auto & low_cardinality_column = assert_cast(rhs); size_t n_index = getIndexes().getUInt(n); size_t m_index = low_cardinality_column.getIndexes().getUInt(m); + if (collator) + return getDictionary().getNestedColumn()->compareAtWithCollation(n_index, m_index, *low_cardinality_column.getDictionary().getNestedColumn(), nan_direction_hint, *collator); return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint); } +int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + +int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -306,12 +317,7 @@ void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int na Permutation unique_perm; if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->getPermutationWithCollation(*collator, reverse, unique_limit, unique_perm); + getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm); } else getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); @@ -438,16 +444,11 @@ void ColumnLowCardinality::getPermutationWithCollation(const Collator & collator getPermutationImpl(reverse, limit, nan_direction_hint, res, &collator); } -void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const +void ColumnLowCardinality::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getDictionary().getNestedColumn().get()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - auto comparator = [this, &column_string, &collator, reverse](size_t lhs, size_t rhs) + auto comparator = [this, &collator, reverse, nan_direction_hint](size_t lhs, size_t rhs) { - int ret = column_string->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *column_string, collator); + int ret = getDictionary().getNestedColumn()->compareAtWithCollation(getIndexes().getUInt(lhs), getIndexes().getUInt(rhs), *getDictionary().getNestedColumn(), nan_direction_hint, collator); return reverse ? -ret : ret; }; diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e45449873fc..0874f619b8a 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -31,11 +31,6 @@ class ColumnLowCardinality final : public COWHelper - void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; - public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -130,13 +125,15 @@ public: PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override; + void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_range) const override; ColumnPtr replicate(const Offsets & offsets) const override { @@ -179,6 +176,7 @@ public: size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } + bool isCollationSupported() const override { return getDictionary().getNestedColumn()->isCollationSupported(); } /** * Checks if the dictionary column is Nullable(T). @@ -318,6 +316,13 @@ private: void compactInplace(); void compactIfSharedDictionary(); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; }; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 63b86f38342..cbb82264694 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -18,7 +18,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; extern const int SIZES_OF_NESTED_COLUMNS_ARE_INCONSISTENT; - extern const int BAD_COLLATION; } @@ -225,7 +224,7 @@ ColumnPtr ColumnNullable::index(const IColumn & indexes, size_t limit) const return ColumnNullable::create(indexed_data, indexed_null_map); } -int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator) const { /// NULL values share the properties of NaN values. /// Here the last parameter of compareAt is called null_direction_hint @@ -247,9 +246,22 @@ int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null } const IColumn & nested_rhs = nullable_rhs.getNestedColumn(); + if (collator) + return getNestedColumn().compareAtWithCollation(n, m, nested_rhs, null_direction_hint, *collator); + return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint); } +int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint); +} + +int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs_, null_direction_hint, &collator); +} + void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -264,12 +276,7 @@ void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_dir if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(&getNestedColumn()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->getPermutationWithCollation(*collator, reverse, 0, res); + getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res); } else getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); @@ -447,12 +454,7 @@ void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_ if (collator) { - /// Collations are supported only for ColumnString - const ColumnString * column_string = checkAndGetColumn(getNestedColumn()); - if (!column_string) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); - - column_string->updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); + getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); } else getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 3d7a7970bd3..47b0103eab4 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -31,11 +31,6 @@ private: ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_); ColumnNullable(const ColumnNullable &) = default; - void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; - - void updatePermutationImpl( - bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; - public: /** Create immutable column using immutable arguments. This arguments may be shared with other columns. * Use IColumn::mutate in order to make mutable column and mutate shared nested columns. @@ -98,11 +93,12 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const; + void updatePermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; void updatePermutationWithCollation( - const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const; + const Collator & collator, bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; @@ -138,6 +134,7 @@ public: bool valuesHaveFixedSize() const override { return nested_column->valuesHaveFixedSize(); } size_t sizeOfValueIfFixed() const override { return null_map->sizeOfValueIfFixed() + nested_column->sizeOfValueIfFixed(); } bool onlyNull() const override { return nested_column->isDummy(); } + bool isCollationSupported() const override { return nested_column->isCollationSupported(); } /// Return the column that represents values. @@ -173,6 +170,13 @@ private: template void applyNullMapImpl(const ColumnUInt8 & map); + + int compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint, const Collator * collator=nullptr) const; + + void getPermutationImpl(bool reverse, size_t limit, int null_direction_hint, Permutation & res, const Collator * collator = nullptr) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int null_direction_hint, Permutation & res, EqualRanges & equal_ranges, const Collator * collator = nullptr) const; }; ColumnPtr makeNullable(const ColumnPtr & column); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 3093ae10646..9ea12041d85 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,11 +284,11 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } -struct ColumnString::cmp +struct ColumnString::Cmp { const ColumnString & parent; bool reverse; - explicit cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit Cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( @@ -299,8 +299,8 @@ struct ColumnString::cmp } }; -template -void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const +template +void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const { size_t s = offsets.size(); res.resize(s); @@ -310,7 +310,7 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa if (limit >= s) limit = 0; - auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; if (limit) std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); @@ -318,8 +318,8 @@ void ColumnString::getPermutationImpl(size_t limit, Permutation & res, Cmp compa std::sort(res.begin(), res.end(), less); } -template -void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const +template +void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const { if (equal_ranges.empty()) return; @@ -334,7 +334,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR if (limit) --number_of_ranges; - auto less = [&comparator](size_t lhs, size_t rhs){ return comparator(lhs, rhs) < 0; }; + auto less = [&cmp](size_t lhs, size_t rhs){ return cmp(lhs, rhs) < 0; }; for (size_t i = 0; i < number_of_ranges; ++i) { @@ -344,7 +344,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_first = first; for (size_t j = first + 1; j < last; ++j) { - if (comparator(res[j], res[new_first]) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -370,7 +370,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_first = first; for (size_t j = first + 1; j < limit; ++j) { - if (comparator(res[j], res[new_first]) != 0) + if (cmp(res[j], res[new_first]) != 0) { if (j - new_first > 1) new_ranges.emplace_back(new_first, j); @@ -380,7 +380,7 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR size_t new_last = limit; for (size_t j = limit; j < last; ++j) { - if (comparator(res[j], res[new_first]) == 0) + if (cmp(res[j], res[new_first]) == 0) { std::swap(res[j], res[new_last]); ++new_last; @@ -393,21 +393,21 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const { - getPermutationImpl(limit, res, cmp(*this, reverse)); + getPermutationImpl(limit, res, Cmp(*this, reverse)); } void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, cmp(*this, reverse)); + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this, reverse)); } -struct ColumnString::cmpWithCollation +struct ColumnString::CmpWithCollation { const ColumnString & parent; const Collator & collator; bool reverse; - cmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { @@ -419,17 +419,16 @@ struct ColumnString::cmpWithCollation } }; -void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const +void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const { - getPermutationImpl(limit, res, cmpWithCollation(*this, collator, reverse)); + getPermutationImpl(limit, res, CmpWithCollation(*this, collator, reverse)); } void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, cmpWithCollation(*this, collator, reverse)); + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator, reverse)); } - ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { size_t col_size = size(); @@ -498,7 +497,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - cmp cmp_op(*this); + Cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { @@ -513,7 +512,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const } -int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const +int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const { const ColumnString & rhs = assert_cast(rhs_); diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index c91d982f126..1e6f60e63b3 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,18 +42,18 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } - struct cmp; + struct Cmp; - struct cmpWithCollation; + struct CmpWithCollation; ColumnString() = default; ColumnString(const ColumnString & src); - template - void getPermutationImpl(size_t limit, Permutation & res, Cmp comparator) const; + template + void getPermutationImpl(size_t limit, Permutation & res, Comparator cmp) const; - template - void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Cmp comparator) const; + template + void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Comparator cmp) const; public: const char * getFamilyName() const override { return "String"; } @@ -233,16 +233,16 @@ public: int direction, int nan_direction_hint) const override; /// Variant of compareAt for string comparison with respect of collation. - int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, const Collator & collator) const; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; /// Sorting with respect of collation. - void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const override; - void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override; ColumnPtr replicate(const Offsets & replicate_offsets) const override; @@ -274,6 +274,8 @@ public: // Throws an exception if offsets/chars are messed up void validate() const; + + bool isCollationSupported() const override { return true; } }; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 98a6611edb7..f588762fb67 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -275,16 +275,27 @@ MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & se return res; } -int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator) const { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) - if (int res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint)) + { + int res; + if (collator && columns[i]->isCollationSupported()) + res = columns[i]->compareAtWithCollation(n, m, *assert_cast(rhs).columns[i], nan_direction_hint, *collator); + else + res = columns[i]->compareAt(n, m, *assert_cast(rhs).columns[i], nan_direction_hint); + if (res) return res; - + } return 0; } +int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint); +} + void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const @@ -293,14 +304,20 @@ void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -template +int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const +{ + return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); +} + struct ColumnTuple::Less { TupleColumns columns; int nan_direction_hint; + bool reverse; + const Collator * collator; - Less(const TupleColumns & columns_, int nan_direction_hint_) - : columns(columns_), nan_direction_hint(nan_direction_hint_) + Less(const TupleColumns & columns_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + : columns(columns_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) { } @@ -308,17 +325,22 @@ struct ColumnTuple::Less { for (const auto & column : columns) { - int res = column->compareAt(a, b, *column, nan_direction_hint); + int res; + if (collator && column->isCollationSupported()) + res = column->compareAtWithCollation(a, b, *column, nan_direction_hint, *collator); + else + res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) - return positive; + return !reverse; else if (res > 0) - return !positive; + return reverse; } return false; } }; -void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +template +void ColumnTuple::getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const { size_t rows = size(); res.resize(rows); @@ -330,28 +352,25 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h if (limit) { - if (reverse) - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); - else - std::partial_sort(res.begin(), res.begin() + limit, res.end(), Less(columns, nan_direction_hint)); + std::partial_sort(res.begin(), res.begin() + limit, res.end(), less); } else { - if (reverse) - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); - else - std::sort(res.begin(), res.end(), Less(columns, nan_direction_hint)); + std::sort(res.begin(), res.end(), less); } } -void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator) const { if (equal_ranges.empty()) return; for (const auto & column : columns) { - column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); + if (collator && column->isCollationSupported()) + column->updatePermutationWithCollation(*collator, reverse, limit, nan_direction_hint, res, equal_ranges); + else + column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_ranges); while (limit && !equal_ranges.empty() && limit <= equal_ranges.back().first) equal_ranges.pop_back(); @@ -361,6 +380,26 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio } } +void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse)); +} + +void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges); +} + +void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const +{ + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse, &collator)); +} + +void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const +{ + updatePermutationImpl(reverse, limit, nan_direction_hint, res, equal_ranges, &collator); +} + void ColumnTuple::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); @@ -433,5 +472,15 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const return false; } +bool ColumnTuple::isCollationSupported() const +{ + for (const auto& column : columns) + { + if (column->isCollationSupported()) + return true; + } + return false; +} + } diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index e8dfd4c8e44..c34768d85a4 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -20,7 +20,6 @@ private: using TupleColumns = std::vector; TupleColumns columns; - template struct Less; explicit ColumnTuple(MutableColumns && columns); @@ -75,15 +74,19 @@ public: void compareColumn(const IColumn & rhs, size_t rhs_row_num, PaddedPODArray * row_indexes, PaddedPODArray & compare_results, int direction, int nan_direction_hint) const override; + int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; - void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const override; + void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; void protect() override; void forEachSubcolumn(ColumnCallback callback) override; bool structureEquals(const IColumn & rhs) const override; + bool isCollationSupported() const override; size_t tupleSize() const { return columns.size(); } @@ -94,6 +97,15 @@ public: Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; } const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } + +private: + int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const; + + template + void getPermutationImpl(size_t limit, Permutation & res, LessOperator less) const; + + void updatePermutationImpl( + bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges, const Collator * collator=nullptr) const; }; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 14e6a9d7eed..6dbcfacefe9 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -9,7 +9,7 @@ class SipHash; - +class Collator; namespace DB { @@ -18,6 +18,7 @@ namespace ErrorCodes { extern const int CANNOT_GET_SIZE_OF_FIELD; extern const int NOT_IMPLEMENTED; + extern const int BAD_COLLATION; } class Arena; @@ -250,6 +251,12 @@ public: */ virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0; + /// Equivalent to compareAt, but collator is used to compare values. + virtual int compareAtWithCollation(size_t, size_t, const IColumn &, int, const Collator &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing it.", ErrorCodes::BAD_COLLATION); + } + /// Compare the whole column with single value from rhs column. /// If row_indexes is nullptr, it's ignored. Otherwise, it is a set of rows to compare. /// compare_results[i] will be equal to compareAt(row_indexes[i], rhs_row_num, rhs, nan_direction_hint) * direction @@ -277,6 +284,18 @@ public: */ virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0; + /** Equivalent to getPermutation and updatePermutation but collator is used to compare values. + * Supported for String, LowCardinality(String), Nullable(String) and for Array and Tuple, containing them. + */ + virtual void getPermutationWithCollation(const Collator &, bool, size_t, int, Permutation &) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + virtual void updatePermutationWithCollation(const Collator &, bool, size_t, int, Permutation &, EqualRanges&) const + { + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); + } + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. @@ -402,6 +421,8 @@ public: virtual bool lowCardinality() const { return false; } + virtual bool isCollationSupported() const { return false; } + virtual ~IColumn() = default; IColumn() = default; IColumn(const IColumn &) = default; diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index 4c90cc723bf..7a222f70199 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -96,7 +96,7 @@ struct SortCursorImpl : column_desc.column_number; sort_columns.push_back(columns[column_number].get()); - need_collation[j] = desc[j].collator != nullptr && typeid_cast(sort_columns.back()); /// TODO Nullable(String) + need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported(); /// TODO Nullable(String) has_collation |= need_collation[j]; } @@ -201,10 +201,7 @@ struct SortCursorWithCollation : SortCursorHelper int nulls_direction = desc.nulls_direction; int res; if (impl->need_collation[i]) - { - const ColumnString & column_string = assert_cast(*impl->sort_columns[i]); - res = column_string.compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), *impl->desc[i].collator); - } + res = impl->sort_columns[i]->compareAtWithCollation(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction, *impl->desc[i].collator); else res = impl->sort_columns[i]->compareAt(lhs_pos, rhs_pos, *(rhs.impl->sort_columns[i]), nulls_direction); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 5d114c746e5..edf911fa61c 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -22,24 +22,6 @@ static bool isCollationRequired(const SortColumnDescription & description) return description.collator != nullptr; } -static bool isCollationSupported(const IColumn * column) -{ - if (column->getDataType() == TypeIndex::String) - return true; - - if (column->getDataType() == TypeIndex::Nullable) - { - const ColumnNullable * column_nullable = assert_cast(column); - return isCollationSupported(&column_nullable->getNestedColumn()); - } - - if (column->getDataType() == TypeIndex::LowCardinality) - { - const ColumnLowCardinality * column_low_cardinality = assert_cast(column); - return isCollationSupported(column_low_cardinality->getDictionary().getNestedColumn().get()); - } - return false; -} ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { @@ -106,8 +88,7 @@ struct PartialSortingLessWithCollation } else if (isCollationRequired(elem.description)) { - const ColumnString & column_string = assert_cast(*elem.column); - res = column_string.compareAtWithCollation(a, b, *elem.column, *elem.description.collator); + res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator); } else res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); @@ -139,18 +120,13 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) bool is_column_const = false; if (isCollationRequired(description[0])) { - /// Check if column supports collations - if (!isCollationSupported(column)) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); - if (const ColumnString * column_string = checkAndGetColumn(column)) - column_string->getPermutationWithCollation(*description[0].collator, reverse, limit, perm); - else if (const ColumnNullable * column_nullable = checkAndGetColumn(column)) - column_nullable->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); - else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column)) - column_low_cardinality->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); - else if (isColumnConst(*column)) + if (isColumnConst(*column)) is_column_const = true; + else + column->getPermutationWithCollation(*description[0].collator, reverse, limit, description[0].nulls_direction, perm); } else if (!isColumnConst(*column)) { @@ -186,8 +162,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) const IColumn * column = columns_with_sort_desc[i].column; if (isCollationRequired(description[i])) { - if (!isCollationSupported(column)) - throw Exception("Collations could be specified only for String columns or columns where nested column is String.", ErrorCodes::BAD_COLLATION); + if (!column->isCollationSupported()) + throw Exception("Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, containing them.", ErrorCodes::BAD_COLLATION); need_collation = true; } @@ -210,20 +186,8 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (isCollationRequired(column.description)) { - if (const ColumnString * column_string = checkAndGetColumn(column.column)) - column_string->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); - - else if (const ColumnNullable * column_nullable = checkAndGetColumn(column.column)) - column_nullable->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); - - else if (const ColumnLowCardinality * column_low_cardinality = checkAndGetColumn(column.column)) - column_low_cardinality->updatePermutationWithCollation( - *column.description.collator, - column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + column.column->updatePermutationWithCollation( + *column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); } else { diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference index b7a4830f9cf..fbffea8df5a 100644 --- a/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.reference @@ -26,3 +26,39 @@ Order by tuple with collate 2 А 2 я 2 Я +Order by without collate +1 Ё +2 А +2 Я +1 а +2 я +1 ё +1 \N +2 \N +Order by with collate +1 а +2 А +1 ё +1 Ё +2 я +2 Я +1 \N +2 \N +Order by tuple without collate +1 Ё +1 а +1 ё +1 \N +2 А +2 Я +2 я +2 \N +Order by tuple with collate +1 а +1 ё +1 Ё +1 \N +2 А +2 я +2 Я +2 \N diff --git a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql index 0f4194ee671..b6fba26eb2d 100644 --- a/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql +++ b/tests/queries/0_stateless/01532_collate_in_low_cardinality.sql @@ -1,8 +1,12 @@ DROP TABLE IF EXISTS test_collate; +DROP TABLE IF EXISTS test_collate_null; CREATE TABLE test_collate (x UInt32, s LowCardinality(String)) ENGINE=Memory(); +CREATE TABLE test_collate_null (x UInt32, s LowCardinality(Nullable(String))) ENGINE=Memory(); INSERT INTO test_collate VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'); +INSERT INTO test_collate_null VALUES (1, 'Ё'), (1, 'ё'), (1, 'а'), (2, 'А'), (2, 'я'), (2, 'Я'), (1, null), (2, null); + SELECT 'Order by without collate'; SELECT * FROM test_collate ORDER BY s; @@ -14,5 +18,16 @@ SELECT * FROM test_collate ORDER BY x, s; SELECT 'Order by tuple with collate'; SELECT * FROM test_collate ORDER BY x, s COLLATE 'ru'; -DROP TABLE test_collate; +SELECT 'Order by without collate'; +SELECT * FROM test_collate_null ORDER BY s; +SELECT 'Order by with collate'; +SELECT * FROM test_collate_null ORDER BY s COLLATE 'ru'; +SELECT 'Order by tuple without collate'; +SELECT * FROM test_collate_null ORDER BY x, s; +SELECT 'Order by tuple with collate'; +SELECT * FROM test_collate_null ORDER BY x, s COLLATE 'ru'; + + +DROP TABLE test_collate; +DROP TABLE test_collate_null; diff --git a/tests/queries/0_stateless/01542_collate_in_array.reference b/tests/queries/0_stateless/01542_collate_in_array.reference new file mode 100644 index 00000000000..2c5a23066f3 --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.reference @@ -0,0 +1,50 @@ +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё','я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а'] +2 ['я','а'] +2 ['Я'] + +1 ['а'] +2 ['А'] +1 ['ё'] +1 ['ё','а'] +2 ['ё','а','а',NULL] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['я'] +2 [NULL,'Я'] + +1 ['а'] +1 ['ё'] +1 ['ё','а'] +1 ['ё',NULL,'я'] +1 ['Ё'] +2 ['А'] +2 ['ё','а','а',NULL] +2 ['я'] +2 [NULL,'Я'] + +2 [['а','а'],['я','ё']] +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['ё']] + +1 [['а','Ё'],['ё','я']] +1 [['а','я'],['а','ё']] +2 [['а','а'],['я','ё']] +2 [['ё']] + diff --git a/tests/queries/0_stateless/01542_collate_in_array.sql b/tests/queries/0_stateless/01542_collate_in_array.sql new file mode 100644 index 00000000000..dd0ec769e7d --- /dev/null +++ b/tests/queries/0_stateless/01542_collate_in_array.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Array(String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Array(LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Array(Array(String))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я', 'а']), (2, ['Я']), (1, ['ё','а']), (1, ['ё', 'я']), (2, ['ё', 'а', 'а']); +INSERT INTO collate_test2 VALUES (1, ['Ё']), (1, ['ё']), (1, ['а']), (2, ['А']), (2, ['я']), (2, [null, 'Я']), (1, ['ё','а']), (1, ['ё', null, 'я']), (2, ['ё', 'а', 'а', null]); +INSERT INTO collate_test3 VALUES (1, [['а', 'я'], ['а', 'ё']]), (1, [['а', 'Ё'], ['ё', 'я']]), (2, [['ё']]), (2, [['а', 'а'], ['я', 'ё']]); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.reference b/tests/queries/0_stateless/01543_collate_in_tuple.reference new file mode 100644 index 00000000000..fe8f935f0a6 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.reference @@ -0,0 +1,60 @@ +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (2,'а') +2 (2,'А') +2 (2,'Я') +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (2,'а') +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +2 (1,'я') +1 (1,NULL) +2 (2,'А') +2 (2,'Я') +1 (2,NULL) +2 (2,NULL) +1 (3,'я') + +1 (1,'а') +1 (1,'ё') +1 (1,'Ё') +1 (1,NULL) +1 (2,NULL) +1 (3,'я') +2 (1,'я') +2 (2,'А') +2 (2,'Я') +2 (2,NULL) + +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +1 (1,(1,['Ё'])) +2 (1,(1,['Я'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (2,(1,['я'])) + +1 (1,(1,['Ё'])) +1 (1,(2,['а'])) +1 (1,(2,['ё','я'])) +1 (2,(1,['ё'])) +1 (2,(1,['ё','а'])) +2 (1,(1,['А'])) +2 (1,(1,['ё','а','а'])) +2 (1,(1,['Я'])) +2 (2,(1,['я'])) + diff --git a/tests/queries/0_stateless/01543_collate_in_tuple.sql b/tests/queries/0_stateless/01543_collate_in_tuple.sql new file mode 100644 index 00000000000..17d9426cf45 --- /dev/null +++ b/tests/queries/0_stateless/01543_collate_in_tuple.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS collate_test1; +DROP TABLE IF EXISTS collate_test2; +DROP TABLE IF EXISTS collate_test3; + +CREATE TABLE collate_test1 (x UInt32, s Tuple(UInt32, String)) ENGINE=Memory(); +CREATE TABLE collate_test2 (x UInt32, s Tuple(UInt32, LowCardinality(Nullable(String)))) ENGINE=Memory(); +CREATE TABLE collate_test3 (x UInt32, s Tuple(UInt32, Tuple(UInt32, Array(String)))) ENGINE=Memory(); + +INSERT INTO collate_test1 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2,'а')), (1, (3, 'я')); +INSERT INTO collate_test2 VALUES (1, (1, 'Ё')), (1, (1, 'ё')), (1, (1, 'а')), (2, (2, 'А')), (2, (1, 'я')), (2, (2, 'Я')), (1, (2, null)), (1, (3, 'я')), (1, (1, null)), (2, (2, null)); +INSERT INTO collate_test3 VALUES (1, (1, (1, ['Ё']))), (1, (2, (1, ['ё']))), (1, (1, (2, ['а']))), (2, (1, (1, ['А']))), (2, (2, (1, ['я']))), (2, (1, (1, ['Я']))), (1, (2, (1, ['ё','а']))), (1, (1, (2, ['ё', 'я']))), (2, (1, (1, ['ё', 'а', 'а']))); + +SELECT * FROM collate_test1 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test1 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test2 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY s COLLATE 'ru'; +SELECT ''; + +SELECT * FROM collate_test3 ORDER BY x, s COLLATE 'ru'; +SELECT ''; + +DROP TABLE collate_test1; +DROP TABLE collate_test2; +DROP TABLE collate_test3; + From cf3f39ed29bf5a127c1dcc5c97ded68fb75befc8 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Thu, 29 Oct 2020 14:37:00 +0300 Subject: [PATCH 24/92] Small changes --- src/Columns/ColumnLowCardinality.cpp | 2 -- src/Columns/ColumnNullable.cpp | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 37e97da88b9..3f03734b738 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -316,9 +316,7 @@ void ColumnLowCardinality::getPermutationImpl(bool reverse, size_t limit, int na size_t unique_limit = getDictionary().size(); Permutation unique_perm; if (collator) - { getDictionary().getNestedColumn()->getPermutationWithCollation(*collator, reverse, unique_limit, nan_direction_hint, unique_perm); - } else getDictionary().getNestedColumn()->getPermutation(reverse, unique_limit, nan_direction_hint, unique_perm); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index cbb82264694..4f2117b1405 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -275,9 +275,7 @@ void ColumnNullable::getPermutationImpl(bool reverse, size_t limit, int null_dir /// Cannot pass limit because of unknown amount of NULLs. if (collator) - { getNestedColumn().getPermutationWithCollation(*collator, reverse, 0, null_direction_hint, res); - } else getNestedColumn().getPermutation(reverse, 0, null_direction_hint, res); @@ -453,9 +451,7 @@ void ColumnNullable::updatePermutationImpl(bool reverse, size_t limit, int null_ } if (collator) - { getNestedColumn().updatePermutationWithCollation(*collator, reverse, limit, null_direction_hint, res, new_ranges); - } else getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, new_ranges); From bcd660bb57862b2aae0572518c1ecde2be59c21b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Oct 2020 08:35:18 +0300 Subject: [PATCH 25/92] Minor fixes --- src/Columns/ColumnArray.cpp | 5 +++-- src/Columns/ColumnConst.h | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index c061dd50642..b420d337701 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -368,13 +368,14 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -struct ColumnArray::Cmp { +struct ColumnArray::Cmp +{ const ColumnArray & parent; int nan_direction_hint; bool reverse; const Collator * collator; - Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) + Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_ = false, const Collator * collator_ = nullptr) : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 4942d27b6c9..d7a8842bf01 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -248,6 +248,8 @@ public: /// The constant value. It is valid even if the size of the column is 0. template T getValue() const { return getField().safeGet>(); } + + bool isCollationSupported() const override { return true; } }; } From cd86f98aec6ff47185d6972d3509e508db4fdd3c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 30 Oct 2020 08:36:27 +0300 Subject: [PATCH 26/92] Minor fixes --- src/Columns/ColumnConst.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index d7a8842bf01..3680926cd9b 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -249,7 +249,7 @@ public: template T getValue() const { return getField().safeGet>(); } - bool isCollationSupported() const override { return true; } + bool isCollationSupported() const override { return data->isCollationSupported(); } }; } From 9868b58531b7ce726e11e71a0b0b068cbb73cd06 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 31 Oct 2020 03:59:58 +0300 Subject: [PATCH 27/92] Minor change --- src/Columns/ColumnString.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 9ea12041d85..477c098f067 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -407,7 +407,8 @@ struct ColumnString::CmpWithCollation const Collator & collator; bool reverse; - CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_=false) : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_ = false) + : parent(parent_), collator(collator_), reverse(reverse_) {} int operator()(size_t lhs, size_t rhs) const { From 822bbcfdba6544dbcddd2f6985b92a3827504862 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 3 Nov 2020 17:25:52 +0300 Subject: [PATCH 28/92] Make Cmp with template --- src/Columns/ColumnArray.cpp | 32 ++++++++++++++++++++++---------- src/Columns/ColumnArray.h | 1 + src/Columns/ColumnString.cpp | 35 +++++++++++++++++++++++------------ src/Columns/ColumnString.h | 2 ++ src/Columns/ColumnTuple.cpp | 20 +++++++++++++------- src/Columns/ColumnTuple.h | 1 + 6 files changed, 62 insertions(+), 29 deletions(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index b420d337701..f03a51e0681 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -368,15 +368,14 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, compare_results, direction, nan_direction_hint); } -struct ColumnArray::Cmp -{ +template +struct ColumnArray::Cmp { const ColumnArray & parent; int nan_direction_hint; - bool reverse; const Collator * collator; - Cmp(const ColumnArray & parent_, int nan_direction_hint_, bool reverse_ = false, const Collator * collator_ = nullptr) - : parent(parent_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) {} + Cmp(const ColumnArray & parent_, int nan_direction_hint_, const Collator * collator_=nullptr) + : parent(parent_), nan_direction_hint(nan_direction_hint_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const { @@ -385,7 +384,7 @@ struct ColumnArray::Cmp res = parent.compareAtWithCollation(lhs, rhs, parent, nan_direction_hint, *collator); else res = parent.compareAt(lhs, rhs, parent, nan_direction_hint); - return reverse ? -res : res; + return positive ? res : -res; } }; @@ -866,22 +865,35 @@ void ColumnArray::updatePermutationImpl(size_t limit, Permutation & res, EqualRa void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint)); + } void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const { - updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint)); } void ColumnArray::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, reverse, &collator)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Cmp(*this, nan_direction_hint, &collator)); } void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const { - updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, reverse, &collator)); + if (reverse) + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); + else + updatePermutationImpl(limit, res, equal_range, Cmp(*this, nan_direction_hint, &collator)); } ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 028eaba73c5..8a02af92dce 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -183,6 +183,7 @@ private: template void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; + template struct Cmp; }; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 477c098f067..23798f64a9c 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -284,18 +284,18 @@ void ColumnString::compareColumn( compare_results, direction, nan_direction_hint); } +template struct ColumnString::Cmp { const ColumnString & parent; - bool reverse; - explicit Cmp(const ColumnString & parent_, bool reverse_=false) : parent(parent_), reverse(reverse_) {} + explicit Cmp(const ColumnString & parent_) : parent(parent_) {} int operator()(size_t lhs, size_t rhs) const { int res = memcmpSmallAllowOverflow15( parent.chars.data() + parent.offsetAt(lhs), parent.sizeAt(lhs) - 1, parent.chars.data() + parent.offsetAt(rhs), parent.sizeAt(rhs) - 1); - return reverse ? -res : res; + return positive ? res : -res; } }; @@ -393,22 +393,27 @@ void ColumnString::updatePermutationImpl(size_t limit, Permutation & res, EqualR void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res) const { - getPermutationImpl(limit, res, Cmp(*this, reverse)); + if (reverse) + getPermutationImpl(limit, res, Cmp(*this)); + else + getPermutationImpl(limit, res, Cmp(*this)); } void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, Cmp(*this, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); + else + updatePermutationImpl(limit, res, equal_ranges, Cmp(*this)); } +template struct ColumnString::CmpWithCollation { const ColumnString & parent; const Collator & collator; - bool reverse; - CmpWithCollation(const ColumnString & parent_, const Collator & collator_, bool reverse_ = false) - : parent(parent_), collator(collator_), reverse(reverse_) {} + CmpWithCollation(const ColumnString & parent_, const Collator & collator_) : parent(parent_), collator(collator_) {} int operator()(size_t lhs, size_t rhs) const { @@ -416,18 +421,24 @@ struct ColumnString::CmpWithCollation reinterpret_cast(&parent.chars[parent.offsetAt(lhs)]), parent.sizeAt(lhs), reinterpret_cast(&parent.chars[parent.offsetAt(rhs)]), parent.sizeAt(rhs)); - return reverse ? -res : res; + return positive ? res : -res; } }; void ColumnString::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res) const { - getPermutationImpl(limit, res, CmpWithCollation(*this, collator, reverse)); + if (reverse) + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); + else + getPermutationImpl(limit, res, CmpWithCollation(*this, collator)); } void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const { - updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator, reverse)); + if (reverse) + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); + else + updatePermutationImpl(limit, res, equal_ranges, CmpWithCollation(*this, collator)); } ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const @@ -498,7 +509,7 @@ void ColumnString::getExtremes(Field & min, Field & max) const size_t min_idx = 0; size_t max_idx = 0; - Cmp cmp_op(*this); + Cmp cmp_op(*this); for (size_t i = 1; i < col_size; ++i) { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 1e6f60e63b3..b71751dbc4e 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -42,8 +42,10 @@ private: /// Size of i-th element, including terminating zero. size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return offsets[i] - offsets[i - 1]; } + template struct Cmp; + template struct CmpWithCollation; ColumnString() = default; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index f588762fb67..d6e1ca982d6 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -309,15 +309,15 @@ int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, return compareAtImpl(n, m, rhs, nan_direction_hint, &collator); } +template struct ColumnTuple::Less { TupleColumns columns; int nan_direction_hint; - bool reverse; const Collator * collator; - Less(const TupleColumns & columns_, int nan_direction_hint_, bool reverse_=false, const Collator * collator_=nullptr) - : columns(columns_), nan_direction_hint(nan_direction_hint_), reverse(reverse_), collator(collator_) + Less(const TupleColumns & columns_, int nan_direction_hint_, const Collator * collator_=nullptr) + : columns(columns_), nan_direction_hint(nan_direction_hint_), collator(collator_) { } @@ -331,9 +331,9 @@ struct ColumnTuple::Less else res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) - return !reverse; + return positive; else if (res > 0) - return reverse; + return !positive; } return false; } @@ -382,7 +382,10 @@ void ColumnTuple::updatePermutationImpl(bool reverse, size_t limit, int nan_dire void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse)); + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint)); } void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_ranges) const @@ -392,7 +395,10 @@ void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_directio void ColumnTuple::getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const { - getPermutationImpl(limit, res, Less(columns, nan_direction_hint, reverse, &collator)); + if (reverse) + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); + else + getPermutationImpl(limit, res, Less(columns, nan_direction_hint, &collator)); } void ColumnTuple::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index c34768d85a4..0bee3463f2f 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -20,6 +20,7 @@ private: using TupleColumns = std::vector; TupleColumns columns; + template struct Less; explicit ColumnTuple(MutableColumns && columns); From b67465b010048d2c46cb0cf19e16aae9b13035fc Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 3 Nov 2020 18:00:51 +0300 Subject: [PATCH 29/92] Fix style --- src/Columns/ColumnArray.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index f03a51e0681..9b948236943 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -369,7 +369,8 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num, } template -struct ColumnArray::Cmp { +struct ColumnArray::Cmp +{ const ColumnArray & parent; int nan_direction_hint; const Collator * collator; From b161127fc1223c40d5225f7deac594dc4bc95179 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 3 Nov 2020 21:19:50 +0300 Subject: [PATCH 30/92] Add a test for spreading parts between threads for ReadInOrderOptimizer --- ...1551_mergetree_read_in_order_spread.reference | 11 +++++++++++ .../01551_mergetree_read_in_order_spread.sql | 16 ++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference create mode 100644 tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference new file mode 100644 index 00000000000..fc10b4707a9 --- /dev/null +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.reference @@ -0,0 +1,11 @@ +(Expression) +ExpressionTransform + (Expression) + ExpressionTransform + (Aggregating) + FinalizingSimpleTransform + AggregatingSortedTransform 3 → 1 + AggregatingInOrderTransform × 3 + (Expression) + ExpressionTransform × 3 + (ReadFromStorage) diff --git a/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql new file mode 100644 index 00000000000..831a7282861 --- /dev/null +++ b/tests/queries/0_stateless/01551_mergetree_read_in_order_spread.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS data_01551; + +CREATE TABLE data_01551 +( + key UInt32 +) engine=AggregatingMergeTree() +PARTITION BY key%2 +ORDER BY (key, key/2) +SETTINGS index_granularity=10; + +INSERT INTO data_01551 SELECT number FROM numbers(100000); +SET max_threads=3; +SET merge_tree_min_rows_for_concurrent_read=10000; +SET optimize_aggregation_in_order=1; +SET read_in_order_two_level_merge_threshold=1; +EXPLAIN PIPELINE SELECT key FROM data_01551 GROUP BY key, key/2; From 2389406c21848d07da7f2fc670a24612c018f6e4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 3 Nov 2020 21:19:50 +0300 Subject: [PATCH 31/92] Fix spreading for ReadInOrderOptimizer with expression in ORDER BY This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 56 ++++++++++++++----- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index f06bfb97b2c..a38d50e56fb 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -82,6 +82,17 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts return Block{ColumnWithTypeAndName(std::move(column), std::make_shared(), "_part")}; } +/// Check if ORDER BY clause of the query has some expression. +static bool sortingDescriptionHasExpressions(const SortDescription & sort_description, const StorageMetadataPtr & metadata_snapshot) +{ + auto all_columns = metadata_snapshot->getColumns(); + for (const auto & sort_column : sort_description) + { + if (!all_columns.has(sort_column.column_name)) + return true; + } + return false; +} size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, @@ -1065,6 +1076,7 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( const size_t min_marks_per_stream = (sum_marks - 1) / num_streams + 1; bool need_preliminary_merge = (parts.size() > settings.read_in_order_two_level_merge_threshold); + size_t max_output_ports = 0; for (size_t i = 0; i < num_streams && !parts.empty(); ++i) { @@ -1174,25 +1186,43 @@ Pipe MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( }); } - if (pipe.numOutputPorts() > 1 && need_preliminary_merge) + max_output_ports = std::max(pipe.numOutputPorts(), max_output_ports); + res.emplace_back(std::move(pipe)); + } + + if (need_preliminary_merge) + { + /// If ORDER BY clause of the query contains some expression, + /// then those new columns should be added for the merge step, + /// and this should be done always, if there is at least one pipe that + /// has multiple output ports. + bool sorting_key_has_expression = sortingDescriptionHasExpressions(input_order_info->order_key_prefix_descr, metadata_snapshot); + bool force_sorting_key_transform = res.size() > 1 && max_output_ports > 1 && sorting_key_has_expression; + + for (auto & pipe : res) { SortDescription sort_description; - for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) - sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j], - input_order_info->direction, 1); - /// Drop temporary columns, added by 'sorting_key_prefix_expr' - out_projection = createProjection(pipe, data); - pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header) + if (pipe.numOutputPorts() > 1 || force_sorting_key_transform) { - return std::make_shared(header, sorting_key_prefix_expr); - }); + for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) + sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j], + input_order_info->direction, 1); - pipe.addTransform(std::make_shared( - pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size)); + /// Drop temporary columns, added by 'sorting_key_prefix_expr' + out_projection = createProjection(pipe, data); + pipe.addSimpleTransform([sorting_key_prefix_expr](const Block & header) + { + return std::make_shared(header, sorting_key_prefix_expr); + }); + } + + if (pipe.numOutputPorts() > 1) + { + pipe.addTransform(std::make_shared( + pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size)); + } } - - res.emplace_back(std::move(pipe)); } return Pipe::unitePipes(std::move(res)); From f7c77b4a25772dde0a88008e26b4e80b23bbfad7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 4 Nov 2020 13:14:23 +0300 Subject: [PATCH 32/92] Fix unit tests --- src/Common/tests/gtest_global_register.h | 15 +++++++++++ src/Functions/tests/gtest_abtesting.cpp | 27 +++++++++++-------- .../MySQL/tests/gtest_create_rewritten.cpp | 4 ++- src/Storages/tests/gtest_storage_log.cpp | 3 +++ ..._transform_query_for_external_database.cpp | 2 +- 5 files changed, 38 insertions(+), 13 deletions(-) create mode 100644 src/Common/tests/gtest_global_register.h diff --git a/src/Common/tests/gtest_global_register.h b/src/Common/tests/gtest_global_register.h new file mode 100644 index 00000000000..c4bde825109 --- /dev/null +++ b/src/Common/tests/gtest_global_register.h @@ -0,0 +1,15 @@ +#pragma once + +#include +#include + + +inline void tryRegisterFunctions() +{ + static struct Register { Register() { DB::registerFunctions(); } } registered; +} + +inline void tryRegisterFormats() +{ + static struct Register { Register() { DB::registerFormats(); } } registered; +} diff --git a/src/Functions/tests/gtest_abtesting.cpp b/src/Functions/tests/gtest_abtesting.cpp index b388a187479..e7ef5b5c3cf 100644 --- a/src/Functions/tests/gtest_abtesting.cpp +++ b/src/Functions/tests/gtest_abtesting.cpp @@ -10,39 +10,44 @@ Variants test_bayesab(std::string dist, PODArray xs, PODArray { Variants variants; - std::cout << std::fixed; + //std::cout << std::fixed; if (dist == "beta") { - std::cout << dist << "\nclicks: "; - for (auto x : xs) std::cout << x << " "; +/* std::cout << dist << "\nclicks: "; + for (auto x : xs) + std::cout << x << " "; std::cout <<"\tconversions: "; - for (auto y : ys) std::cout << y << " "; + for (auto y : ys) + std::cout << y << " "; - std::cout << "\n"; + std::cout << "\n";*/ variants = bayesian_ab_test(dist, xs, ys); } else if (dist == "gamma") { - std::cout << dist << "\nclicks: "; - for (auto x : xs) std::cout << x << " "; +/* std::cout << dist << "\nclicks: "; + for (auto x : xs) + std::cout << x << " "; std::cout <<"\tcost: "; - for (auto y : ys) std::cout << y << " "; + for (auto y : ys) + std::cout << y << " "; + + std::cout << "\n";*/ - std::cout << "\n"; variants = bayesian_ab_test(dist, xs, ys); } - for (size_t i = 0; i < variants.size(); ++i) +/* for (size_t i = 0; i < variants.size(); ++i) std::cout << i << " beats 0: " << variants[i].beats_control << std::endl; for (size_t i = 0; i < variants.size(); ++i) std::cout << i << " to be best: " << variants[i].best << std::endl; std::cout << convertToJson({"0", "1", "2"}, variants) << std::endl; - +*/ Float64 max_val = 0.0, min_val = 2.0; for (size_t i = 0; i < variants.size(); ++i) { diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index b940e4e0c95..2221b7b1588 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -12,7 +12,9 @@ #include #include #include -#include +#include +#include + using namespace DB; diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 8de14b53471..7dc140c2da8 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -127,6 +128,8 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) sample.insert(std::move(col)); } + tryRegisterFormats(); + std::ostringstream ss; WriteBufferFromOStream out_buf(ss); BlockOutputStreamPtr output = FormatFactory::instance().getOutput("Values", out_buf, sample, context); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 31fc49582ad..48811c1c86a 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include using namespace DB; From ff8f80b6b6d446a6daff9d1962da081197964fc9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 4 Nov 2020 13:34:47 +0300 Subject: [PATCH 33/92] Remove old file --- .../tests/gtest_global_register_functions.h | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 src/Common/tests/gtest_global_register_functions.h diff --git a/src/Common/tests/gtest_global_register_functions.h b/src/Common/tests/gtest_global_register_functions.h deleted file mode 100644 index 5ca4d64522e..00000000000 --- a/src/Common/tests/gtest_global_register_functions.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include -#include - -struct RegisteredFunctionsState -{ - RegisteredFunctionsState() - { - DB::registerFunctions(); - } - - RegisteredFunctionsState(RegisteredFunctionsState &&) = default; -}; - -inline void tryRegisterFunctions() -{ - static RegisteredFunctionsState registered_functions_state; -} From 7ec73e1f206f34627e48279a7d1eb9da3646e0ef Mon Sep 17 00:00:00 2001 From: feng lv Date: Wed, 4 Nov 2020 11:21:59 +0000 Subject: [PATCH 34/92] fix build --- src/Interpreters/TableJoin.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index cd837cc15d6..5db914bc457 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -28,8 +28,6 @@ TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_) , temporary_files_codec(settings.temporary_files_codec) , tmp_volume(tmp_volume_) { - if (settings.partial_merge_join) - join_algorithm = JoinAlgorithm::PREFER_PARTIAL_MERGE; } void TableJoin::resetCollected() From 269e96f17ee8ffd0f02081d20b004fb6bfbe7cf0 Mon Sep 17 00:00:00 2001 From: feng lv Date: Wed, 4 Nov 2020 12:15:31 +0000 Subject: [PATCH 35/92] fix --- src/Interpreters/TreeRewriter.cpp | 8 +++++--- ...8_setting_aggregate_functions_null_for_empty.reference | 8 ++++++++ .../01528_setting_aggregate_functions_null_for_empty.sql | 8 ++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 7c37365a384..c8691c25f1b 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -120,10 +120,12 @@ struct CustomizeAggregateFunctionsSuffixData void visit(ASTFunction & func, ASTPtr &) { - if (AggregateFunctionFactory::instance().isAggregateFunctionName(func.name) - && !endsWith(func.name, customized_func_suffix)) + const auto & instance = AggregateFunctionFactory::instance(); + if (instance.isAggregateFunctionName(func.name) && !endsWith(func.name, customized_func_suffix)) { - func.name = func.name + customized_func_suffix; + auto properties = instance.tryGetProperties(func.name); + if (properties && !properties->returns_default_when_only_null) + func.name = func.name + customized_func_suffix; } } }; diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference index 570ea20ffad..9c6ae9c65ab 100644 --- a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.reference @@ -1,8 +1,16 @@ 0 \N +0 +\N \N \N +0 +\N 45 45 +10 +10 45 45 +10 +10 diff --git a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql index c59b592e701..e76ce667bbc 100644 --- a/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql +++ b/tests/queries/0_stateless/01528_setting_aggregate_functions_null_for_empty.sql @@ -7,11 +7,15 @@ CREATE TABLE defaults SELECT sum(n) FROM defaults; SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; SET aggregate_functions_null_for_empty=1; SELECT sum(n) FROM defaults; SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; INSERT INTO defaults SELECT * FROM numbers(10); @@ -19,10 +23,14 @@ SET aggregate_functions_null_for_empty=0; SELECT sum(n) FROM defaults; SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; SET aggregate_functions_null_for_empty=1; SELECT sum(n) FROM defaults; SELECT sumOrNull(n) FROM defaults; +SELECT count(n) FROM defaults; +SELECT countOrNull(n) FROM defaults; DROP TABLE defaults; From b1e75ec6f5b34de8ac8d53f8f7a4a1110edd2f84 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 4 Nov 2020 23:43:11 +0300 Subject: [PATCH 36/92] CreateQuery compound column type indendation fix --- src/Parsers/ASTColumnDeclaration.cpp | 6 +++++- .../0_stateless/01458_named_tuple_millin.reference | 4 ++-- .../queries/0_stateless/01458_named_tuple_millin.sql | 4 +--- ...548_create_table_compound_column_format.reference | 12 ++++++++++++ .../01548_create_table_compound_column_format.sh | 8 ++++++++ 5 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/01548_create_table_compound_column_format.reference create mode 100755 tests/queries/0_stateless/01548_create_table_compound_column_format.sh diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 730e892f8f7..0e0847713c2 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -55,7 +55,11 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta if (type) { settings.ostr << ' '; - type->formatImpl(settings, state, frame); + + FormatStateStacked typeFrame = frame; + typeFrame.indent = 0; + + type->formatImpl(settings, state, typeFrame); } if (null_modifier) diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.reference b/tests/queries/0_stateless/01458_named_tuple_millin.reference index b826566c74b..d6d6d7ae8d4 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.reference +++ b/tests/queries/0_stateless/01458_named_tuple_millin.reference @@ -1,12 +1,12 @@ CREATE TABLE default.tuple ( - `j` Tuple( a Int8, b String) + `j` Tuple(a Int8, b String) ) ENGINE = Memory j Tuple(a Int8, b String) CREATE TABLE default.tuple ( - `j` Tuple( a Int8, b String) + `j` Tuple(a Int8, b String) ) ENGINE = Memory j Tuple(a Int8, b String) diff --git a/tests/queries/0_stateless/01458_named_tuple_millin.sql b/tests/queries/0_stateless/01458_named_tuple_millin.sql index 7687dd4c158..ea730e65bb7 100644 --- a/tests/queries/0_stateless/01458_named_tuple_millin.sql +++ b/tests/queries/0_stateless/01458_named_tuple_millin.sql @@ -10,9 +10,7 @@ SHOW CREATE TABLE tuple FORMAT TSVRaw; DESC tuple; DROP TABLE tuple; -CREATE TABLE tuple -ENGINE = Memory AS -SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j; +CREATE TABLE tuple ENGINE = Memory AS SELECT CAST((1, 'Test'), 'Tuple(a Int8, b String)') AS j; SHOW CREATE TABLE tuple FORMAT TSVRaw; DESC tuple; diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.reference b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference new file mode 100644 index 00000000000..c6c4dcdfa4a --- /dev/null +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.reference @@ -0,0 +1,12 @@ +CREATE TABLE test +( + `a` Int64, + `b` NESTED(a Int64) +) +ENGINE = TinyLog +CREATE TABLE test +( + `a` Int64, + `b` TUPLE(a Int64) +) +ENGINE = TinyLog diff --git a/tests/queries/0_stateless/01548_create_table_compound_column_format.sh b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh new file mode 100755 index 00000000000..6c9384e01c1 --- /dev/null +++ b/tests/queries/0_stateless/01548_create_table_compound_column_format.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +echo "CREATE TABLE test(a Int64, b NESTED(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT + +echo "CREATE TABLE test(a Int64, b TUPLE(a Int64)) ENGINE=TinyLog" | $CLICKHOUSE_FORMAT \ No newline at end of file From d5a51ddbbfef08b305f592e90af1b116c1eeac94 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 4 Nov 2020 23:44:43 +0300 Subject: [PATCH 37/92] Update CMakeLists.txt --- programs/odbc-bridge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 043c0d5dd7d..8e7ccfedc70 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -23,7 +23,7 @@ add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE daemon - clickhouse_common_io + dbms Poco::Data Poco::Data::ODBC ) From 159cd5ef72dd7e7473975adad22e43a87bc15cf1 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 5 Nov 2020 15:32:03 +0300 Subject: [PATCH 38/92] add --database arg to CLICKHOUSE_BENCHMARK --- tests/queries/shell_config.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 9249fbc0411..0b5b0940cd7 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -7,6 +7,7 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} @@ -17,7 +18,7 @@ export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICK [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY} local"} export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfuscator"} -export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark"} +export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark ${CLICKHOUSE_BENCHMARK_OPT0:-}"} export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:="/etc/clickhouse-server/config.xml"} export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:="/etc/clickhouse-client/config.xml"} From e53f604924deaca2d9930471b417be6eb5ea1503 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Thu, 5 Nov 2020 21:43:18 +0300 Subject: [PATCH 39/92] Update 01304_direct_io.sh --- tests/queries/0_stateless/01304_direct_io.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01304_direct_io.sh b/tests/queries/0_stateless/01304_direct_io.sh index dcf2adbd64f..244e4c6e02d 100755 --- a/tests/queries/0_stateless/01304_direct_io.sh +++ b/tests/queries/0_stateless/01304_direct_io.sh @@ -9,7 +9,7 @@ $CLICKHOUSE_CLIENT --multiquery --query " INSERT INTO bug SELECT rand64(), '2020-06-07' FROM numbers(50000000); OPTIMIZE TABLE bug FINAL;" -$CLICKHOUSE_BENCHMARK --database "$CLICKHOUSE_DATABASE" --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err +$CLICKHOUSE_BENCHMARK --iterations 10 --max_threads 100 --min_bytes_to_use_direct_io 1 <<< "SELECT sum(UserID) FROM bug PREWHERE NOT ignore(Date)" 1>/dev/null 2>"$CLICKHOUSE_TMP"/err cat "$CLICKHOUSE_TMP"/err | grep Exception cat "$CLICKHOUSE_TMP"/err | grep Loaded From b2e17916e43688aca6f347787d044c48960f78d4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 5 Nov 2020 23:04:34 +0300 Subject: [PATCH 40/92] Mask password in data_path in the system.distribution_queue --- .../test_cluster_with_incorrect_pw.xml | 1 + .../System/StorageSystemDistributionQueue.cpp | 71 ++++++++++++++++++- .../test_cluster_with_incorrect_pw.xml | 14 ++++ tests/config/install.sh | 1 + ...5_system_distribution_queue_mask.reference | 4 ++ .../01555_system_distribution_queue_mask.sql | 36 ++++++++++ .../queries/0_stateless/arcadia_skip_list.txt | 1 + 7 files changed, 127 insertions(+), 1 deletion(-) create mode 120000 programs/server/config.d/test_cluster_with_incorrect_pw.xml create mode 100644 tests/config/config.d/test_cluster_with_incorrect_pw.xml create mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.reference create mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.sql diff --git a/programs/server/config.d/test_cluster_with_incorrect_pw.xml b/programs/server/config.d/test_cluster_with_incorrect_pw.xml new file mode 120000 index 00000000000..4e4b334c6d1 --- /dev/null +++ b/programs/server/config.d/test_cluster_with_incorrect_pw.xml @@ -0,0 +1 @@ +../../../tests/config/config.d/test_cluster_with_incorrect_pw.xml \ No newline at end of file diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 39ccea64e26..786bd8bf967 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -10,6 +10,75 @@ #include #include +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +} + + +namespace +{ + +using namespace DB; + +/// Drop "password" from the path. +/// +/// In case of use_compact_format_in_distributed_parts_names=0 the path format is: +/// +/// user[:password]@host:port#default_database format +/// +/// And password should be masked out. +/// +/// See: +/// - Cluster::Address::fromFullString() +/// - Cluster::Address::toFullString() +std::string maskDataPath(const std::string & path) +{ + std::string masked_path = path; + + if (!masked_path.ends_with('/')) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path format"); + + masked_path.pop_back(); + + size_t dir_name_pos = masked_path.rfind('/'); + if (dir_name_pos == std::string::npos) + { + /// Do not include full path into the exception message since it may include password. + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path format"); + } + ++dir_name_pos; + + size_t user_pw_end = masked_path.find('@', dir_name_pos); + if (user_pw_end == std::string::npos) + { + /// Likey new format (use_compact_format_in_distributed_parts_names=1) + return path; + } + + size_t pw_start = masked_path.find(':', dir_name_pos); + if (pw_start > user_pw_end) + { + /// No password in path + return path; + } + ++pw_start; + + size_t pw_length = user_pw_end - pw_start; + /// Replace with a single '*' to hide even the password length. + masked_path.replace(pw_start, pw_length, 1, '*'); + + masked_path.push_back('/'); + + return masked_path; +} + +} namespace DB { @@ -103,7 +172,7 @@ void StorageSystemDistributionQueue::fillData(MutableColumns & res_columns, cons size_t col_num = 0; res_columns[col_num++]->insert(database); res_columns[col_num++]->insert(table); - res_columns[col_num++]->insert(status.path); + res_columns[col_num++]->insert(maskDataPath(status.path)); res_columns[col_num++]->insert(status.is_blocked); res_columns[col_num++]->insert(status.error_count); res_columns[col_num++]->insert(status.files_count); diff --git a/tests/config/config.d/test_cluster_with_incorrect_pw.xml b/tests/config/config.d/test_cluster_with_incorrect_pw.xml new file mode 100644 index 00000000000..79d122f7b9b --- /dev/null +++ b/tests/config/config.d/test_cluster_with_incorrect_pw.xml @@ -0,0 +1,14 @@ + + + + + + localhost + 9000 + + foo + + + + + diff --git a/tests/config/install.sh b/tests/config/install.sh index ff96e46c947..f6fae181ac8 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -27,6 +27,7 @@ ln -sf $SRC_PATH/config.d/secure_ports.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/clusters.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/graphite.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/database_atomic.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/test_cluster_with_incorrect_pw.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/users.d/log_queries.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/readonly.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/users.d/access_management.xml $DEST_SERVER_PATH/users.d/ diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference new file mode 100644 index 00000000000..fda87e4d99e --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -0,0 +1,4 @@ +masked +2,"default:*@localhost:9000" +no masking +1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql new file mode 100644 index 00000000000..0143b8e46ed --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -0,0 +1,36 @@ +-- force data path with the user/pass in it +set use_compact_format_in_distributed_parts_names=0; +-- use async send even for localhost +set prefer_localhost_replica=0; + +drop table if exists dist_01555; +drop table if exists data_01555; +create table data_01555 (key Int) Engine=Null(); + +-- +-- masked +-- +SELECT 'masked'; +create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; -- { serverError 516; } +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +drop table dist_01555; + +-- +-- no masking +-- +SELECT 'no masking'; +create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +-- cleanup +drop table dist_01555; +drop table data_01555; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 900cc82b33f..f35f5bde6a9 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -159,4 +159,5 @@ 01547_query_log_current_database 01548_query_log_query_execution_ms 01552_dict_fixedstring +01555_system_distribution_queue_mask 01557_max_parallel_replicas_no_sample.sql From 30bf5e6d2610ec071b117d70028dfe58eb828477 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 22 Oct 2020 14:18:10 +0800 Subject: [PATCH 41/92] Prune partition in verbatim way. --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 15 ++++-- src/Storages/MergeTree/PartitionPruner.h | 53 +++++++++++++++++++ src/Storages/StorageMergeTree.cpp | 24 ++------- src/Storages/StorageReplicatedMergeTree.cpp | 22 ++------ ...01540_verbatim_partition_pruning.reference | 1 + .../01540_verbatim_partition_pruning.sql | 11 ++++ 6 files changed, 85 insertions(+), 41 deletions(-) create mode 100644 src/Storages/MergeTree/PartitionPruner.h create mode 100644 tests/queries/0_stateless/01540_verbatim_partition_pruning.reference create mode 100644 tests/queries/0_stateless/01540_verbatim_partition_pruning.sql diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2ca989e12e6..1326ef68759 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -226,13 +227,15 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( } std::optional minmax_idx_condition; + std::optional partition_pruner; if (data.minmax_idx_expr) { minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context); - if (settings.force_index_by_date && minmax_idx_condition->alwaysUnknownOrTrue()) + if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { - String msg = "MinMax index by columns ("; + String msg = "Neither MinMax index by columns ("; bool first = true; for (const String & col : data.minmax_idx_columns) { @@ -242,7 +245,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( msg += ", "; msg += col; } - msg += ") is not used and setting 'force_index_by_date' is set"; + msg += ") nor partition expr is used and setting 'force_index_by_date' is set"; throw Exception(msg, ErrorCodes::INDEX_NOT_USED); } @@ -266,6 +269,12 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( part->minmax_idx.hyperrectangle, data.minmax_idx_column_types).can_be_true) continue; + if (partition_pruner) + { + if (partition_pruner->canBePruned(part)) + continue; + } + if (max_block_numbers_to_read) { auto blocks_iterator = max_block_numbers_to_read->find(part->info.partition_id); diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h new file mode 100644 index 00000000000..bb39c6d50fe --- /dev/null +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -0,0 +1,53 @@ +#pragma once + +#include + +#include +#include +#include + +namespace DB +{ + +class PartitionPruner +{ +private: + std::unordered_map partition_filter_map; + const KeyDescription & partition_key; + KeyCondition partition_condition; + bool useless; + using DataPart = IMergeTreeDataPart; + using DataPartPtr = std::shared_ptr; + +public: + PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context) + : partition_key(partition_key_) + , partition_condition( + query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, true /* strict */) + , useless(partition_condition.alwaysUnknownOrTrue()) + { + } + + bool canBePruned(DataPartPtr part) + { + if (part->isEmpty()) + return true; + const auto & partition_id = part->info.partition_id; + bool is_valid; + if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) + is_valid = it->second; + else + { + const auto & partition_value = part->partition.value; + std::vector index_value(partition_value.begin(), partition_value.end()); + is_valid = partition_condition.mayBeTrueInRange( + partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); + partition_filter_map.emplace(partition_id, is_valid); + } + return !is_valid; + } + + bool isUseless() const { return useless; } +}; + +} diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 11e8859e76c..3d0f4fa4029 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -192,31 +193,14 @@ std::optional StorageMergeTree::totalRows() const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & partition_key = metadata_snapshot->getPartitionKey(); - Names partition_key_columns = partition_key.column_names; - KeyCondition key_condition( - query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */); - if (key_condition.alwaysUnknownOrTrue()) + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context); + if (partition_pruner.isUseless()) return {}; - std::unordered_map partition_filter_map; size_t res = 0; auto lock = lockParts(); for (const auto & part : getDataPartsStateRange(DataPartState::Committed)) { - if (part->isEmpty()) - continue; - const auto & partition_id = part->info.partition_id; - bool is_valid; - if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) - is_valid = it->second; - else - { - const auto & partition_value = part->partition.value; - std::vector index_value(partition_value.begin(), partition_value.end()); - is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - partition_filter_map.emplace(partition_id, is_valid); - } - if (is_valid) + if (!partition_pruner.canBePruned(part)) res += part->rows_count; } return res; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7412031c595..3554755db77 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -3662,28 +3663,13 @@ std::optional StorageReplicatedMergeTree::totalRows() const std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & partition_key = metadata_snapshot->getPartitionKey(); - Names partition_key_columns = partition_key.column_names; - KeyCondition key_condition( - query_info, context, partition_key_columns, partition_key.expression, true /* single_point */, true /* strict */); - if (key_condition.alwaysUnknownOrTrue()) + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context); + if (partition_pruner.isUseless()) return {}; - std::unordered_map partition_filter_map; size_t res = 0; foreachCommittedParts([&](auto & part) { - const auto & partition_id = part->info.partition_id; - bool is_valid; - if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) - is_valid = it->second; - else - { - const auto & partition_value = part->partition.value; - std::vector index_value(partition_value.begin(), partition_value.end()); - is_valid = key_condition.mayBeTrueInRange(partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - partition_filter_map.emplace(partition_id, is_valid); - } - if (is_valid) + if (!partition_pruner.canBePruned(part)) res += part->rows_count; }); return res; diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql new file mode 100644 index 00000000000..6498ed99766 --- /dev/null +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -0,0 +1,11 @@ +drop table if exists xy; + +create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 100 order by y settings index_granularity = 1; + +insert into xy values (1, 2), (2, 3); + +SET max_rows_to_read = 1; + +select * from xy where intHash64(x) % 100 = intHash64(1) % 100; + +drop table if exists xy; From aa436a3cb174cdd5e31cb885dfe3d94872354e0a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 6 Nov 2020 11:50:58 +0800 Subject: [PATCH 42/92] Transform single point --- src/Interpreters/ExpressionActions.h | 1 + src/Storages/MergeTree/KeyCondition.cpp | 144 +++++++++++++++--- src/Storages/MergeTree/KeyCondition.h | 3 + src/Storages/MergeTree/PartitionPruner.cpp | 25 +++ src/Storages/MergeTree/PartitionPruner.h | 20 +-- src/Storages/ya.make | 1 + ...01540_verbatim_partition_pruning.reference | 1 + .../01540_verbatim_partition_pruning.sql | 3 + 8 files changed, 162 insertions(+), 36 deletions(-) create mode 100644 src/Storages/MergeTree/PartitionPruner.cpp diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index b35f8972c97..bd55de9484e 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -53,6 +53,7 @@ using ArrayJoinActionPtr = std::shared_ptr; */ struct ExpressionAction { + friend class KeyCondition; private: using ExpressionActionsPtr = std::shared_ptr; public: diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7cb872f174a..220321ce5c5 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -485,6 +486,29 @@ static std::pair applyFunctionForFieldOfUnknownType( } +/// Same as above but for binary operators +static std::pair applyBinaryFunctionForFieldOfUnknownType( + const FunctionOverloadResolverPtr & func, + const DataTypePtr & arg_type, + const Field & arg_value, + const DataTypePtr & arg_type2, + const Field & arg_value2) +{ + ColumnsWithTypeAndName arguments{ + {arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}}; + + FunctionBasePtr func_base = func->build(arguments); + + DataTypePtr return_type = func_base->getResultType(); + + auto col = func_base->execute(arguments, return_type, 1); + + Field result = (*col)[0]; + + return {std::move(result), std::move(return_type)}; +} + + static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field) { /// Fallback for fields without block reference. @@ -616,6 +640,70 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( return found_transformation; } +/// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)` +bool KeyCondition::canConstantBeWrappedByFunctions( + const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) +{ + String expr_name = node->getColumnName(); + const auto & sample_block = key_expr->getSampleBlock(); + if (!sample_block.has(expr_name)) + return false; + + bool found_transformation = false; + auto input_column = sample_block.getByName(expr_name); + auto const_column = out_type->createColumnConst(1, out_value); + out_value = (*castColumn({const_column, out_type, "c"}, input_column.type))[0]; + out_type = input_column.type; + Block transform({{input_column.type->createColumn(), input_column.type, input_column.name}}); + for (const ExpressionAction & action : key_expr->getActions()) + { + const auto & argument_names = action.argument_names; + if (action.type == ExpressionAction::Type::APPLY_FUNCTION) + { + if (!action.function_base->isDeterministic()) + return false; + if (argument_names.size() == 1 && argument_names[0] == expr_name) + { + std::tie(out_value, out_type) = applyFunctionForFieldOfUnknownType(action.function_builder, out_type, out_value); + } + else if (argument_names.size() == 2) + { + if (!transform.has(argument_names[0]) || !transform.has(argument_names[1])) + return false; + auto left = transform.getByName(argument_names[0]); + auto right = transform.getByName(argument_names[1]); + if (isColumnConst(*left.column)) + { + auto left_arg_type = left.type; + auto left_arg_value = (*left.column)[0]; + std::tie(out_value, out_type) = applyBinaryFunctionForFieldOfUnknownType( + action.function_builder, left_arg_type, left_arg_value, out_type, out_value); + } + else if (isColumnConst(*right.column)) + { + auto right_arg_type = right.type; + auto right_arg_value = (*right.column)[0]; + std::tie(out_value, out_type) = applyBinaryFunctionForFieldOfUnknownType( + action.function_builder, out_type, out_value, right_arg_type, right_arg_value); + } + } + + expr_name = action.result_name; + auto it = key_columns.find(expr_name); + if (key_columns.end() != it) + { + out_key_column_num = it->second; + out_key_column_type = sample_block.getByName(it->first).type; + found_transformation = true; + break; + } + } + action.execute(transform, true); + } + + return found_transformation; +} + bool KeyCondition::tryPrepareSetIndex( const ASTs & args, const Context & context, @@ -868,27 +956,47 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont key_arg_pos = 0; is_set_const = true; } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + else if (getConstant(args[1], block_with_constants, const_value, const_type)) { - key_arg_pos = 0; + if (isKeyPossiblyWrappedByMonotonicFunctions(args[0], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 0; + } + else if (canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else if ( + single_point && func_name == "equals" + && canConstantBeWrappedByFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else + return false; } - else if (getConstant(args[1], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[0], key_column_num, key_expr_type, const_value, const_type)) + else if (getConstant(args[0], block_with_constants, const_value, const_type)) { - key_arg_pos = 0; - is_constant_transformed = true; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) - { - key_arg_pos = 1; - } - else if (getConstant(args[0], block_with_constants, const_value, const_type) - && canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) - { - key_arg_pos = 1; - is_constant_transformed = true; + if (isKeyPossiblyWrappedByMonotonicFunctions(args[1], context, key_column_num, key_expr_type, chain)) + { + key_arg_pos = 1; + } + else if (canConstantBeWrappedByMonotonicFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 1; + is_constant_transformed = true; + } + else if ( + single_point && func_name == "equals" + && canConstantBeWrappedByFunctions(args[1], key_column_num, key_expr_type, const_value, const_type)) + { + key_arg_pos = 0; + is_constant_transformed = true; + } + else + return false; } else return false; diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index 909ec01bf9f..265bc01be49 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -402,6 +402,9 @@ private: Field & out_value, DataTypePtr & out_type); + bool canConstantBeWrappedByFunctions( + const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type); + /// If it's possible to make an RPNElement /// that will filter values (possibly tuples) by the content of 'prepared_set', /// do it and return true. diff --git a/src/Storages/MergeTree/PartitionPruner.cpp b/src/Storages/MergeTree/PartitionPruner.cpp new file mode 100644 index 00000000000..8888367ebe5 --- /dev/null +++ b/src/Storages/MergeTree/PartitionPruner.cpp @@ -0,0 +1,25 @@ +#include + +namespace DB +{ + +bool PartitionPruner::canBePruned(const DataPartPtr & part) +{ + if (part->isEmpty()) + return true; + const auto & partition_id = part->info.partition_id; + bool is_valid; + if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) + is_valid = it->second; + else + { + const auto & partition_value = part->partition.value; + std::vector index_value(partition_value.begin(), partition_value.end()); + is_valid = partition_condition.mayBeTrueInRange( + partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); + partition_filter_map.emplace(partition_id, is_valid); + } + return !is_valid; +} + +} diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index bb39c6d50fe..8e5c1ac7551 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -9,6 +9,7 @@ namespace DB { +/// Pruning partitions in verbatim way using KeyCondition class PartitionPruner { private: @@ -28,24 +29,7 @@ public: { } - bool canBePruned(DataPartPtr part) - { - if (part->isEmpty()) - return true; - const auto & partition_id = part->info.partition_id; - bool is_valid; - if (auto it = partition_filter_map.find(partition_id); it != partition_filter_map.end()) - is_valid = it->second; - else - { - const auto & partition_value = part->partition.value; - std::vector index_value(partition_value.begin(), partition_value.end()); - is_valid = partition_condition.mayBeTrueInRange( - partition_value.size(), index_value.data(), index_value.data(), partition_key.data_types); - partition_filter_map.emplace(partition_id, is_valid); - } - return !is_valid; - } + bool canBePruned(const DataPartPtr & part); bool isUseless() const { return useless; } }; diff --git a/src/Storages/ya.make b/src/Storages/ya.make index 0f14826d859..e0c6cab602f 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -85,6 +85,7 @@ SRCS( MergeTree/MergeType.cpp MergeTree/MergedBlockOutputStream.cpp MergeTree/MergedColumnOnlyOutputStream.cpp + MergeTree/PartitionPruner.cpp MergeTree/ReplicatedFetchList.cpp MergeTree/ReplicatedMergeTreeAddress.cpp MergeTree/ReplicatedMergeTreeAltersSequence.cpp diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference index 9874d6464ab..46256e7cdab 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference @@ -1 +1,2 @@ 1 2 +1 2 diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql index 6498ed99766..bde90422dbc 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -8,4 +8,7 @@ SET max_rows_to_read = 1; select * from xy where intHash64(x) % 100 = intHash64(1) % 100; +-- This works too +select * from xy where x = 1; + drop table if exists xy; From 2b0085c1061b77eb6a4271b7a4c91b641c46a5aa Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 6 Nov 2020 19:18:42 +0800 Subject: [PATCH 43/92] Pruning is different from counting --- src/Storages/MergeTree/KeyCondition.cpp | 27 ++++++++++++++++--- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/MergeTree/PartitionPruner.h | 4 +-- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 220321ce5c5..7920c831d14 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -592,7 +592,15 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( if (!sample_block.has(expr_name)) return false; + /// TODO Nullable index is not yet landed. + if (out_value.isNull()) + return false; + bool found_transformation = false; + auto input_column = sample_block.getByName(expr_name); + auto const_column = out_type->createColumnConst(1, out_value); + out_value = (*castColumn({const_column, out_type, "c"}, input_column.type))[0]; + out_type = input_column.type; for (const ExpressionAction & action : key_expr->getActions()) { /** The key functional expression constraint may be inferred from a plain column in the expression. @@ -644,11 +652,18 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions( bool KeyCondition::canConstantBeWrappedByFunctions( const ASTPtr & node, size_t & out_key_column_num, DataTypePtr & out_key_column_type, Field & out_value, DataTypePtr & out_type) { + if (strict) + return false; + String expr_name = node->getColumnName(); const auto & sample_block = key_expr->getSampleBlock(); if (!sample_block.has(expr_name)) return false; + /// TODO Nullable index is not yet landed. + if (out_value.isNull()) + return false; + bool found_transformation = false; auto input_column = sample_block.getByName(expr_name); auto const_column = out_type->createColumnConst(1, out_value); @@ -950,11 +965,15 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, const Context & cont bool is_set_const = false; bool is_constant_transformed = false; - if (functionIsInOrGlobalInOperator(func_name) - && tryPrepareSetIndex(args, context, out, key_column_num)) + if (functionIsInOrGlobalInOperator(func_name)) { - key_arg_pos = 0; - is_set_const = true; + if (tryPrepareSetIndex(args, context, out, key_column_num)) + { + key_arg_pos = 0; + is_set_const = true; + } + else + return false; } else if (getConstant(args[1], block_with_constants, const_value, const_type)) { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 1326ef68759..8b6b52cb707 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -231,7 +231,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (data.minmax_idx_expr) { minmax_idx_condition.emplace(query_info, context, data.minmax_idx_columns, data.minmax_idx_expr); - partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context); + partition_pruner.emplace(metadata_snapshot->getPartitionKey(), query_info, context, false /* strict */); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { diff --git a/src/Storages/MergeTree/PartitionPruner.h b/src/Storages/MergeTree/PartitionPruner.h index 8e5c1ac7551..74b02d671bb 100644 --- a/src/Storages/MergeTree/PartitionPruner.h +++ b/src/Storages/MergeTree/PartitionPruner.h @@ -21,10 +21,10 @@ private: using DataPartPtr = std::shared_ptr; public: - PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context) + PartitionPruner(const KeyDescription & partition_key_, const SelectQueryInfo & query_info, const Context & context, bool strict) : partition_key(partition_key_) , partition_condition( - query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, true /* strict */) + query_info, context, partition_key.column_names, partition_key.expression, true /* single_point */, strict) , useless(partition_condition.alwaysUnknownOrTrue()) { } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 3d0f4fa4029..8a57f3da1f7 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -193,7 +193,7 @@ std::optional StorageMergeTree::totalRows() const std::optional StorageMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 3554755db77..4b845b4be12 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3663,7 +3663,7 @@ std::optional StorageReplicatedMergeTree::totalRows() const std::optional StorageReplicatedMergeTree::totalRowsByPartitionPredicate(const SelectQueryInfo & query_info, const Context & context) const { auto metadata_snapshot = getInMemoryMetadataPtr(); - PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context); + PartitionPruner partition_pruner(metadata_snapshot->getPartitionKey(), query_info, context, true /* strict */); if (partition_pruner.isUseless()) return {}; size_t res = 0; From 05ba70c91fa001eb8e97904e5f7d938fcdc7d8d1 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 14 Oct 2020 13:55:42 +0300 Subject: [PATCH 44/92] Clickhouse client added history file parameter --- docs/en/interfaces/cli.md | 1 + programs/client/Client.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 07ed3e5c884..42416383860 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -123,6 +123,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--stacktrace` – If specified, also print the stack trace if an exception occurs. - `--config-file` – The name of the configuration file. - `--secure` – If specified, will connect to server over secure connection. +- `--history_file` — Path to a file containing command history. - `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). ### Configuration Files {#configuration_files} diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 005fece3277..57802fdbd1a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -2329,6 +2329,7 @@ public: ("query-fuzzer-runs", po::value()->default_value(0), "query fuzzer runs") ("opentelemetry-traceparent", po::value(), "OpenTelemetry traceparent header as described by W3C Trace Context recommendation") ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") + ("history_file", po::value(), "path to history file") ; Settings cmd_settings; @@ -2485,6 +2486,8 @@ public: config().setInt("suggestion_limit", options["suggestion_limit"].as()); if (options.count("highlight")) config().setBool("highlight", options["highlight"].as()); + if (options.count("history_file")) + config().setString("history_file", options["history_file"].as()); if ((query_fuzzer_runs = options["query-fuzzer-runs"].as())) { From 044f14346354334f93e97705d555c102d7f99b76 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Fri, 6 Nov 2020 18:53:33 +0300 Subject: [PATCH 45/92] Update ThreadPool.cpp --- src/Common/ThreadPool.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index e527e97d608..7b2c2108629 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -216,7 +216,7 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ if (!jobs.empty()) { - job = jobs.top().job; + job = std::move(jobs.top().job); jobs.pop(); } else From b072bc7d2bfe390c835ff69184e00bf041146470 Mon Sep 17 00:00:00 2001 From: Mikhail Filimonov Date: Fri, 6 Nov 2020 17:54:44 +0100 Subject: [PATCH 46/92] Test for the issue #12615 --- .../format_schemas/social.proto | 6 ++ .../test_storage_kafka/social_pb2.py | 75 +++++++++++++++ tests/integration/test_storage_kafka/test.py | 96 ++++++++++++++++++- 3 files changed, 176 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto create mode 100644 tests/integration/test_storage_kafka/social_pb2.py diff --git a/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto new file mode 100644 index 00000000000..3bf82737fa5 --- /dev/null +++ b/tests/integration/test_storage_kafka/clickhouse_path/format_schemas/social.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message User { + string username = 1; + int32 timestamp = 2; +} \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/social_pb2.py b/tests/integration/test_storage_kafka/social_pb2.py new file mode 100644 index 00000000000..eeba5efc8b1 --- /dev/null +++ b/tests/integration/test_storage_kafka/social_pb2.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: social.proto + +from google.protobuf import descriptor as _descriptor +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor.FileDescriptor( + name='social.proto', + package='', + syntax='proto3', + serialized_options=None, + serialized_pb=b'\n\x0csocial.proto\"+\n\x04User\x12\x10\n\x08username\x18\x01 \x01(\t\x12\x11\n\ttimestamp\x18\x02 \x01(\x05\x62\x06proto3' +) + + + + +_USER = _descriptor.Descriptor( + name='User', + full_name='User', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='username', full_name='User.username', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=b"".decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='timestamp', full_name='User.timestamp', index=1, + number=2, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=16, + serialized_end=59, +) + +DESCRIPTOR.message_types_by_name['User'] = _USER +_sym_db.RegisterFileDescriptor(DESCRIPTOR) + +User = _reflection.GeneratedProtocolMessageType('User', (_message.Message,), { + 'DESCRIPTOR' : _USER, + '__module__' : 'social_pb2' + # @@protoc_insertion_point(class_scope:User) + }) +_sym_db.RegisterMessage(User) + + +# @@protoc_insertion_point(module_scope) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 6ef37c1e231..5d943361414 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -30,6 +30,8 @@ libprotoc 3.0.0 protoc --python_out=. kafka.proto """ from . import kafka_pb2 +from . import social_pb2 + # TODO: add test for run-time offset update in CH, if we manually update it on Kafka side. # TODO: add test for SELECT LIMIT is working. @@ -115,6 +117,20 @@ def kafka_produce_protobuf_messages_no_delimeters(topic, start_index, num_messag producer.flush() print("Produced {} messages for topic {}".format(num_messages, topic)) +def kafka_produce_protobuf_social(topic, start_index, num_messages): + data = b'' + for i in range(start_index, start_index + num_messages): + msg = social_pb2.User() + msg.username='John Doe {}'.format(i) + msg.timestamp=1000000+i + serialized_msg = msg.SerializeToString() + data = data + _VarintBytes(len(serialized_msg)) + serialized_msg + producer = KafkaProducer(bootstrap_servers="localhost:9092", value_serializer=producer_serializer) + producer.send(topic=topic, value=data) + producer.flush() + print(("Produced {} messages for topic {}".format(num_messages, topic))) + + def avro_confluent_message(schema_registry_client, value): # type: (CachedSchemaRegistryClient, dict) -> str @@ -982,6 +998,84 @@ def test_kafka_protobuf(kafka_cluster): kafka_check_result(result, True) +@pytest.mark.timeout(180) +def test_kafka_string_field_on_first_position_in_protobuf(kafka_cluster): +# https://github.com/ClickHouse/ClickHouse/issues/12615 + + instance.query(''' +CREATE TABLE test.kafka ( + username String, + timestamp Int32 + ) ENGINE = Kafka() +SETTINGS + kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'string_field_on_first_position_in_protobuf', + kafka_group_name = 'string_field_on_first_position_in_protobuf', + kafka_format = 'Protobuf', + kafka_schema = 'social:User'; + + SELECT * FROM test.kafka; + ''') + + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 0, 20) + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 20, 1) + kafka_produce_protobuf_social('string_field_on_first_position_in_protobuf', 21, 29) + + result = instance.query('SELECT * FROM test.kafka', ignore_error=True) + expected = '''\ +John Doe 0 1000000 +John Doe 1 1000001 +John Doe 2 1000002 +John Doe 3 1000003 +John Doe 4 1000004 +John Doe 5 1000005 +John Doe 6 1000006 +John Doe 7 1000007 +John Doe 8 1000008 +John Doe 9 1000009 +John Doe 10 1000010 +John Doe 11 1000011 +John Doe 12 1000012 +John Doe 13 1000013 +John Doe 14 1000014 +John Doe 15 1000015 +John Doe 16 1000016 +John Doe 17 1000017 +John Doe 18 1000018 +John Doe 19 1000019 +John Doe 20 1000020 +John Doe 21 1000021 +John Doe 22 1000022 +John Doe 23 1000023 +John Doe 24 1000024 +John Doe 25 1000025 +John Doe 26 1000026 +John Doe 27 1000027 +John Doe 28 1000028 +John Doe 29 1000029 +John Doe 30 1000030 +John Doe 31 1000031 +John Doe 32 1000032 +John Doe 33 1000033 +John Doe 34 1000034 +John Doe 35 1000035 +John Doe 36 1000036 +John Doe 37 1000037 +John Doe 38 1000038 +John Doe 39 1000039 +John Doe 40 1000040 +John Doe 41 1000041 +John Doe 42 1000042 +John Doe 43 1000043 +John Doe 44 1000044 +John Doe 45 1000045 +John Doe 46 1000046 +John Doe 47 1000047 +John Doe 48 1000048 +John Doe 49 1000049 +''' + assert TSV(result) == TSV(expected) + @pytest.mark.timeout(30) def test_kafka_protobuf_no_delimiter(kafka_cluster): instance.query(''' @@ -2117,7 +2211,7 @@ def test_kafka_duplicates_when_commit_failed(kafka_cluster): kafka_format = 'JSONEachRow', kafka_max_block_size = 20, kafka_flush_interval_ms = 1000; - + SELECT * FROM test.kafka LIMIT 1; /* do subscription & assignment in advance (it can take different time, test rely on timing, so can flap otherwise) */ ''') From ad52fceb83440c977333f3d12aa1a08e7cdb5fdf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:34:14 +0300 Subject: [PATCH 47/92] Fix bad code in SmallObjectPool --- src/Common/SmallObjectPool.h | 56 ++++++++++++------------------------ 1 file changed, 19 insertions(+), 37 deletions(-) diff --git a/src/Common/SmallObjectPool.h b/src/Common/SmallObjectPool.h index 469c102bdae..b17b509f14c 100644 --- a/src/Common/SmallObjectPool.h +++ b/src/Common/SmallObjectPool.h @@ -1,76 +1,59 @@ #pragma once #include -#include -#include -#include -#include -#include +#include namespace DB { - /** Can allocate memory objects of fixed size with deletion support. - * For small `object_size`s allocated no less than getMinAllocationSize() bytes. */ + * For small `object_size`s allocated no less than pointer size. + */ class SmallObjectPool { private: - struct Block { Block * next; }; - static constexpr auto getMinAllocationSize() { return sizeof(Block); } - const size_t object_size; Arena pool; - Block * free_list{}; + char * free_list{}; public: - SmallObjectPool( - const size_t object_size_, const size_t initial_size = 4096, const size_t growth_factor = 2, - const size_t linear_growth_threshold = 128 * 1024 * 1024) - : object_size{std::max(object_size_, getMinAllocationSize())}, - pool{initial_size, growth_factor, linear_growth_threshold} + SmallObjectPool(size_t object_size_) + : object_size{std::max(object_size_, sizeof(char *))} { if (pool.size() < object_size) return; - const auto num_objects = pool.size() / object_size; - auto head = free_list = ext::bit_cast(pool.alloc(num_objects * object_size)); + const size_t num_objects = pool.size() / object_size; + free_list = pool.alloc(num_objects * object_size); + char * head = free_list; - for (const auto i : ext::range(0, num_objects - 1)) + for (size_t i = 0; i < num_objects - 1; ++i) { - (void) i; - head->next = ext::bit_cast(ext::bit_cast(head) + object_size); - head = head->next; + char * next = head + object_size; + unalignedStore(head, next); + head = next; } - head->next = nullptr; + unalignedStore(head, nullptr); } char * alloc() { if (free_list) { - const auto res = reinterpret_cast(free_list); - free_list = free_list->next; + char * res = free_list; + free_list = unalignedLoad(free_list); return res; } return pool.alloc(object_size); } - void free(const void * ptr) + void free(char * ptr) { - union - { - const void * p_v; - Block * block; - }; - - p_v = ptr; - block->next = free_list; - - free_list = block; + unalignedStore(ptr, free_list); + free_list = ptr; } /// The size of the allocated pool in bytes @@ -81,5 +64,4 @@ public: }; - } From c4398952da66d15eb7086bb2419605d6f9ef9e1e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:40:11 +0300 Subject: [PATCH 48/92] Addition to prev. revision --- src/Dictionaries/ComplexKeyCacheDictionary.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Dictionaries/ComplexKeyCacheDictionary.h b/src/Dictionaries/ComplexKeyCacheDictionary.h index e055d1a6b33..2663fee266d 100644 --- a/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include "DictionaryStructure.h" #include "IDictionary.h" From 57de99bcd994a317b53f4e2c5598bc9c2134a8f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:46:43 +0300 Subject: [PATCH 49/92] More simple --- src/Common/SmallObjectPool.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/src/Common/SmallObjectPool.h b/src/Common/SmallObjectPool.h index b17b509f14c..0eda8931946 100644 --- a/src/Common/SmallObjectPool.h +++ b/src/Common/SmallObjectPool.h @@ -15,27 +15,12 @@ class SmallObjectPool private: const size_t object_size; Arena pool; - char * free_list{}; + char * free_list = nullptr; public: SmallObjectPool(size_t object_size_) : object_size{std::max(object_size_, sizeof(char *))} { - if (pool.size() < object_size) - return; - - const size_t num_objects = pool.size() / object_size; - free_list = pool.alloc(num_objects * object_size); - char * head = free_list; - - for (size_t i = 0; i < num_objects - 1; ++i) - { - char * next = head + object_size; - unalignedStore(head, next); - head = next; - } - - unalignedStore(head, nullptr); } char * alloc() From 584f1a83be5288d0f474013633a03de9d1903f44 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:52:28 +0300 Subject: [PATCH 50/92] Do not instrument 3rd-party libraries with UBSan --- contrib/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 7d6b9c0e374..d15715c7b8f 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -14,6 +14,11 @@ unset (_current_dir_name) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -w") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -w") +if (SANITIZE STREQUAL "undefined") + # 3rd-party libraries usually not intended to work with UBSan. + add_compile_options(-fno-sanitize=undefined) +endif() + set_property(DIRECTORY PROPERTY EXCLUDE_FROM_ALL 1) add_subdirectory (boost-cmake) @@ -157,9 +162,6 @@ if(USE_INTERNAL_SNAPPY_LIBRARY) add_subdirectory(snappy) set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy") - if(SANITIZE STREQUAL "undefined") - target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined) - endif() endif() if (USE_INTERNAL_PARQUET_LIBRARY) From 8ec6653fb13762639cf8862f2f5505b7ac7ef81b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:56:21 +0300 Subject: [PATCH 51/92] Fix UBSan report in Poco --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index 757d947235b..f49c6ab8d3a 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 757d947235b307675cff964f29b19d388140a9eb +Subproject commit f49c6ab8d3aa71828bd1b411485c21722e8c9d82 From 17b3dff0c2745e788019143bf7fd134c54555bb1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 20:58:04 +0300 Subject: [PATCH 52/92] Whitespaces --- src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h | 2 ++ src/Processors/Sources/SinkToOutputStream.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h index 054aec94464..531b2636747 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h @@ -1,9 +1,11 @@ #pragma once + #include #include #include #include + namespace DB { diff --git a/src/Processors/Sources/SinkToOutputStream.cpp b/src/Processors/Sources/SinkToOutputStream.cpp index 9727b637d8b..7612ba10fb3 100644 --- a/src/Processors/Sources/SinkToOutputStream.cpp +++ b/src/Processors/Sources/SinkToOutputStream.cpp @@ -1,6 +1,7 @@ #include #include + namespace DB { From 28c6dea1a20d9c96ecb7ead1a09b30fd43d8719b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 21:02:16 +0300 Subject: [PATCH 53/92] LOGICAL_ERROR was mistakenly used for some ErrorCodes #12735 --- src/Interpreters/Context.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index befb097faf7..b909ce383a3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -65,6 +65,7 @@ #include #include + namespace ProfileEvents { extern const Event ContextLock; @@ -153,7 +154,7 @@ public: } else if (it->second->key.first != context.client_info.current_user) { - throw Exception("Session belongs to a different user", ErrorCodes::LOGICAL_ERROR); + throw Exception("Session belongs to a different user", ErrorCodes::SESSION_IS_LOCKED); } /// Use existing session. @@ -596,7 +597,8 @@ VolumePtr Context::setTemporaryStorage(const String & path, const String & polic { StoragePolicyPtr tmp_policy = getStoragePolicySelector(lock)->get(policy_name); if (tmp_policy->getVolumes().size() != 1) - throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception("Policy " + policy_name + " is used temporary files, such policy should have exactly one volume", + ErrorCodes::NO_ELEMENTS_IN_CONFIG); shared->tmp_volume = tmp_policy->getVolume(0); } @@ -1083,11 +1085,13 @@ String Context::getInitialQueryId() const void Context::setCurrentDatabaseNameInGlobalContext(const String & name) { if (global_context != this) - throw Exception("Cannot set current database for non global context, this method should be used during server initialization", ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot set current database for non global context, this method should be used during server initialization", + ErrorCodes::LOGICAL_ERROR); auto lock = getLock(); if (!current_database.empty()) - throw Exception("Default database name cannot be changed in global context without server restart", ErrorCodes::LOGICAL_ERROR); + throw Exception("Default database name cannot be changed in global context without server restart", + ErrorCodes::LOGICAL_ERROR); current_database = name; } @@ -1470,7 +1474,7 @@ DDLWorker & Context::getDDLWorker() const { auto lock = getLock(); if (!shared->ddl_worker) - throw Exception("DDL background thread is not initialized.", ErrorCodes::LOGICAL_ERROR); + throw Exception("DDL background thread is not initialized.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); return *shared->ddl_worker; } From ee46993359b2a9fb62443b6f35951c86e73f726a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 5 Nov 2020 23:37:17 +0300 Subject: [PATCH 54/92] Enable use_compact_format_in_distributed_parts_names by default --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4d4712dcba7..b099b9c6ab5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -376,7 +376,7 @@ class IColumn; M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ - M(Bool, use_compact_format_in_distributed_parts_names, false, "Changes format of directories names for distributed table insert parts.", 0) \ + M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ M(Seconds, temporary_live_view_timeout, DEFAULT_TEMPORARY_LIVE_VIEW_TIMEOUT_SEC, "Timeout after which temporary live view is deleted.", 0) \ From f379f9837d05d475433d8d9e634cec243ff4613f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 5 Nov 2020 23:58:06 +0300 Subject: [PATCH 55/92] Document use_compact_format_in_distributed_parts_names setting Since it has been enabled by default, at least it should be documented. --- docs/en/operations/settings/settings.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 63ff688e51a..d83f7d6c219 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1765,6 +1765,23 @@ Default value: `0`. - [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed) - [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed) + + +## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names} + +Uses compact format for storing blocks for async (`insert_distributed_sync`) INSERT into tables with `Distributed` engine. + +Possible values: + +- 0 — Uses `user[:password]@host:port#default_database` directory format. +- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. + +Default value: `1`. + +!!! note "Note" + - with `use_compact_format_in_distributed_parts_names=0` changes from cluster definition will not be applied for async INSERT. + - with `use_compact_format_in_distributed_parts_names=1` changing the order of the nodes in the cluster definition, will change the `shard_index`/`replica_index` so be aware. + ## background_buffer_flush_schedule_pool_size {#background_buffer_flush_schedule_pool_size} Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session. From ac8df08186ac940d97cd0413649872ef6abecac3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 6 Nov 2020 08:34:37 +0300 Subject: [PATCH 56/92] Set use_compact_format_in_distributed_parts_names=0 for some integration tests To avoid depends from default. --- tests/integration/test_distributed_format/test.py | 14 +++++++++----- .../test_distributed_storage_configuration/test.py | 8 ++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_distributed_format/test.py b/tests/integration/test_distributed_format/test.py index 607154e37f1..22054077544 100644 --- a/tests/integration/test_distributed_format/test.py +++ b/tests/integration/test_distributed_format/test.py @@ -47,10 +47,12 @@ def test_single_file(started_cluster, cluster): def test_two_files(started_cluster, cluster): node.query( "create table test.distr_2 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) - node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", - settings={"use_compact_format_in_distributed_parts_names": "1"}) - node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", - settings={"use_compact_format_in_distributed_parts_names": "1"}) + node.query("insert into test.distr_2 values (0, '_'), (1, 'a')", settings={ + "use_compact_format_in_distributed_parts_names": "1", + }) + node.query("insert into test.distr_2 values (2, 'bb'), (3, 'ccc')", settings={ + "use_compact_format_in_distributed_parts_names": "1", + }) query = "select * from file('/var/lib/clickhouse/data/test/distr_2/shard1_replica1/{1,2,3,4}.bin', 'Distributed') order by x" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) @@ -70,7 +72,9 @@ def test_two_files(started_cluster, cluster): def test_single_file_old(started_cluster, cluster): node.query( "create table test.distr_3 (x UInt64, s String) engine = Distributed('{}', database, table)".format(cluster)) - node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')") + node.query("insert into test.distr_3 values (1, 'a'), (2, 'bb'), (3, 'ccc')", settings={ + "use_compact_format_in_distributed_parts_names": "0", + }) query = "select * from file('/var/lib/clickhouse/data/test/distr_3/default@not_existing:9000/1.bin', 'Distributed')" out = node.exec_in_container(['/usr/bin/clickhouse', 'local', '--stacktrace', '-q', query]) diff --git a/tests/integration/test_distributed_storage_configuration/test.py b/tests/integration/test_distributed_storage_configuration/test.py index d293b96399d..976fc5211a7 100644 --- a/tests/integration/test_distributed_storage_configuration/test.py +++ b/tests/integration/test_distributed_storage_configuration/test.py @@ -48,7 +48,9 @@ def test_insert(start_cluster): # manual only (but only for remote node) node.query('SYSTEM STOP DISTRIBUTED SENDS test.dist_foo') - node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)') + node.query('INSERT INTO test.dist_foo SELECT * FROM numbers(100)', settings={ + 'use_compact_format_in_distributed_parts_names': '0', + }) assert _files_in_dist_mon(node, 'disk1', 'dist_foo') == 1 assert _files_in_dist_mon(node, 'disk2', 'dist_foo') == 0 @@ -61,7 +63,9 @@ def test_insert(start_cluster): # node.query('RENAME TABLE test.dist_foo TO test.dist2_foo') - node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)') + node.query('INSERT INTO test.dist2_foo SELECT * FROM numbers(100)', settings={ + 'use_compact_format_in_distributed_parts_names': '0', + }) assert _files_in_dist_mon(node, 'disk1', 'dist2_foo') == 0 assert _files_in_dist_mon(node, 'disk2', 'dist2_foo') == 1 From c057e86040f9ffc4f972e29558eff6c2c10ae5ea Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 6 Nov 2020 21:04:50 +0300 Subject: [PATCH 57/92] Fix path masking in system.distribution_queue for multiple nodes --- .../System/StorageSystemDistributionQueue.cpp | 46 ++++++++++--------- .../test_cluster_with_incorrect_pw.xml | 9 +++- ...5_system_distribution_queue_mask.reference | 2 +- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/Storages/System/StorageSystemDistributionQueue.cpp b/src/Storages/System/StorageSystemDistributionQueue.cpp index 786bd8bf967..c8d8c88ec08 100644 --- a/src/Storages/System/StorageSystemDistributionQueue.cpp +++ b/src/Storages/System/StorageSystemDistributionQueue.cpp @@ -46,32 +46,34 @@ std::string maskDataPath(const std::string & path) masked_path.pop_back(); - size_t dir_name_pos = masked_path.rfind('/'); - if (dir_name_pos == std::string::npos) + size_t node_pos = masked_path.rfind('/'); + /// Loop through each node, that separated with a comma + while (node_pos != std::string::npos) { - /// Do not include full path into the exception message since it may include password. - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid path format"); - } - ++dir_name_pos; + ++node_pos; - size_t user_pw_end = masked_path.find('@', dir_name_pos); - if (user_pw_end == std::string::npos) - { - /// Likey new format (use_compact_format_in_distributed_parts_names=1) - return path; - } + size_t user_pw_end = masked_path.find('@', node_pos); + if (user_pw_end == std::string::npos) + { + /// Likey new format (use_compact_format_in_distributed_parts_names=1) + return path; + } - size_t pw_start = masked_path.find(':', dir_name_pos); - if (pw_start > user_pw_end) - { - /// No password in path - return path; - } - ++pw_start; + size_t pw_start = masked_path.find(':', node_pos); + if (pw_start > user_pw_end) + { + /// No password in path + return path; + } + ++pw_start; - size_t pw_length = user_pw_end - pw_start; - /// Replace with a single '*' to hide even the password length. - masked_path.replace(pw_start, pw_length, 1, '*'); + size_t pw_length = user_pw_end - pw_start; + /// Replace with a single '*' to hide even the password length. + masked_path.replace(pw_start, pw_length, 1, '*'); + + /// "," cannot be in the node specification since it will be encoded in hex. + node_pos = masked_path.find(',', node_pos); + } masked_path.push_back('/'); diff --git a/tests/config/config.d/test_cluster_with_incorrect_pw.xml b/tests/config/config.d/test_cluster_with_incorrect_pw.xml index 79d122f7b9b..109e35afc37 100644 --- a/tests/config/config.d/test_cluster_with_incorrect_pw.xml +++ b/tests/config/config.d/test_cluster_with_incorrect_pw.xml @@ -2,8 +2,15 @@ + true - localhost + 127.0.0.1 + 9000 + + foo + + + 127.0.0.2 9000 foo diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference index fda87e4d99e..bd0eac10816 100644 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -1,4 +1,4 @@ masked -2,"default:*@localhost:9000" +3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" no masking 1,"default@localhost:9000" From c01177099eb11ed077954733800d4051cc86c2df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 21:19:57 +0300 Subject: [PATCH 58/92] Better diagnostics on syntax error in CREATE TABLE query #12493 --- src/Parsers/ParserCreateQuery.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 3c20446eb15..e09bab30ff3 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -144,8 +144,8 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E /// doesn't check that parsed string is existing data type. In this way /// REMOVE keyword can be parsed as data type and further parsing will fail. /// So we just check this keyword and in case of success return column - /// column declaration with name only. - if (s_remove.checkWithoutMoving(pos, expected)) + /// declaration with name only. + if (!require_type && s_remove.checkWithoutMoving(pos, expected)) { if (!check_keywords_after_name) return false; @@ -165,11 +165,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; - if (!s_default.checkWithoutMoving(pos, expected) && - !s_materialized.checkWithoutMoving(pos, expected) && - !s_alias.checkWithoutMoving(pos, expected) && - !s_comment.checkWithoutMoving(pos, expected) && - !s_codec.checkWithoutMoving(pos, expected)) + if (require_type || + ( !s_default.checkWithoutMoving(pos, expected) + && !s_materialized.checkWithoutMoving(pos, expected) + && !s_alias.checkWithoutMoving(pos, expected) + && !s_comment.checkWithoutMoving(pos, expected) + && !s_codec.checkWithoutMoving(pos, expected))) { if (!type_parser.parse(pos, type, expected)) return false; From 57a0dd30741a55aa2d8d00650bb6bab988223039 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 21:23:30 +0300 Subject: [PATCH 59/92] Add a test --- .../0_stateless/01559_misplaced_codec_diagnostics.reference | 1 + .../0_stateless/01559_misplaced_codec_diagnostics.sh | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference create mode 100755 tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference new file mode 100644 index 00000000000..d5bdb816bf2 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.reference @@ -0,0 +1 @@ +Unknown data type family: CODEC diff --git a/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh new file mode 100755 index 00000000000..9904b6388d6 --- /dev/null +++ b/tests/queries/0_stateless/01559_misplaced_codec_diagnostics.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE t (c CODEC(NONE)) ENGINE = Memory" 2>&1 | grep -oF 'Unknown data type family: CODEC' | uniq From ab83170bac1a0c898b9a8b40105876be0909733c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 21:44:49 +0300 Subject: [PATCH 60/92] Remove a test --- .../0_stateless/01532_client_sni.reference | 1 - tests/queries/0_stateless/01532_client_sni.sh | 21 ------------------- 2 files changed, 22 deletions(-) delete mode 100644 tests/queries/0_stateless/01532_client_sni.reference delete mode 100755 tests/queries/0_stateless/01532_client_sni.sh diff --git a/tests/queries/0_stateless/01532_client_sni.reference b/tests/queries/0_stateless/01532_client_sni.reference deleted file mode 100644 index 879b5791c7b..00000000000 --- a/tests/queries/0_stateless/01532_client_sni.reference +++ /dev/null @@ -1 +0,0 @@ -0000000e000c0000096c6f63616c686f7374 diff --git a/tests/queries/0_stateless/01532_client_sni.sh b/tests/queries/0_stateless/01532_client_sni.sh deleted file mode 100755 index 0b122713fde..00000000000 --- a/tests/queries/0_stateless/01532_client_sni.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -. "$CURDIR"/../shell_config.sh - -# Check that ClickHouse properly uses SNI extension in Client Hello packet in HTTPS connection. - -nc -q0 -l 5678 | xxd -p | grep -oF $'0000000e000c0000096c6f63616c686f7374' & - -${CLICKHOUSE_LOCAL} --query "SELECT * FROM url('https://localhost:5678/', RawBLOB, 'data String')" 2>&1 | grep -v -F 'Timeout' - -# grep -oF '\x00\x00\x00\x0e\x00\x0c\x00\x00\x09\x6c\x6f\x63\x61\x6c\x68\x6f\x73\x74' -# ^^^^^^^^ ^^^^^^^ ^^^^^^^ ^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# | | | | | -# server name data | hostname | l o c a l h o s t -# extension id len: 14 | type | -# | | -# hostnames list hostname -# len, 12 len, 9 - -wait From 85ecc9ea104eb76581d4b3426a7609c3882ef8ad Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 6 Nov 2020 22:11:57 +0300 Subject: [PATCH 61/92] Update ASTColumnDeclaration.cpp --- src/Parsers/ASTColumnDeclaration.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp index 0e0847713c2..27ece3e18c2 100644 --- a/src/Parsers/ASTColumnDeclaration.cpp +++ b/src/Parsers/ASTColumnDeclaration.cpp @@ -56,10 +56,10 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta { settings.ostr << ' '; - FormatStateStacked typeFrame = frame; - typeFrame.indent = 0; + FormatStateStacked type_frame = frame; + type_frame.indent = 0; - type->formatImpl(settings, state, typeFrame); + type->formatImpl(settings, state, type_frame); } if (null_modifier) From 7b7289436306088e2b5213dd29b00d86037a6925 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Nov 2020 23:23:08 +0300 Subject: [PATCH 62/92] Simplified code --- src/IO/ReadBufferFromPocoSocket.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index acd0414ef5d..5c66c3209f6 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -67,7 +67,7 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) { - return offset() != buffer().size() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); } } From fd84d16387e507763a9dfa661397c385d7ab6931 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 03:14:53 +0300 Subject: [PATCH 63/92] Fix "server failed to start" error --- src/Access/DiskAccessStorage.cpp | 2 + src/Access/LDAPAccessStorage.cpp | 1 + src/Access/UsersConfigAccessStorage.cpp | 1 + .../AggregateFunctionGroupArray.h | 4 + src/AggregateFunctions/ReservoirSampler.h | 2 + src/Client/MultiplexedConnections.cpp | 1 + src/Columns/tests/gtest_weak_hash_32.cpp | 11 +- src/Common/Config/ConfigProcessor.cpp | 1 + src/Common/Exception.cpp | 2 + src/Common/HTMLForm.h | 2 + src/Common/MemoryTracker.cpp | 2 + src/Common/ShellCommand.cpp | 1 + src/Common/StackTrace.cpp | 4 + src/Common/StudentTTest.cpp | 2 + src/Common/ThreadProfileEvents.cpp | 1 + src/Common/ThreadStatus.cpp | 1 + src/Common/UInt128.h | 1 + src/Common/XDBCBridgeHelper.h | 1 + src/Common/ZooKeeper/TestKeeper.cpp | 1 + src/Common/checkStackSize.cpp | 1 + src/Common/parseGlobs.cpp | 4 + .../gtest_getMultipleValuesFromConfig.cpp | 2 + .../tests/gtest_sensitive_data_masker.cpp | 8 + src/Compression/tests/compressed_buffer.cpp | 1 + src/Core/MySQL/IMySQLReadPacket.cpp | 1 + src/Core/MySQL/IMySQLWritePacket.cpp | 1 + src/Core/SortDescription.h | 1 + .../CheckConstraintsBlockOutputStream.cpp | 2 + src/DataStreams/IBlockInputStream.cpp | 1 + .../DataTypeCustomSimpleAggregateFunction.cpp | 1 + src/DataTypes/DataTypesDecimal.cpp | 1 + .../tests/gtest_data_type_get_common_type.cpp | 1 + src/Databases/DatabaseOnDisk.cpp | 2 + .../MySQL/MaterializeMySQLSyncThread.cpp | 5 + src/Dictionaries/DictionaryStructure.cpp | 1 + .../tests/gtest_dictionary_configuration.cpp | 1 + src/Functions/abtesting.cpp | 42 ++- src/IO/HTTPCommon.cpp | 1 + src/IO/MySQLPacketPayloadReadBuffer.cpp | 14 +- src/IO/ReadWriteBufferFromHTTP.h | 5 +- src/IO/S3/PocoHTTPClient.cpp | 1 + src/IO/tests/gtest_bit_io.cpp | 1 + src/IO/tests/hashing_read_buffer.cpp | 1 + src/IO/tests/limit_read_buffer2.cpp | 1 + src/IO/tests/write_buffer.cpp | 1 + .../ClusterProxy/SelectStreamFactory.cpp | 1 + src/Interpreters/Context.cpp | 1 + src/Interpreters/ExpressionActions.cpp | 4 + src/Interpreters/InterpreterCreateQuery.cpp | 2 + src/Interpreters/InterpreterExplainQuery.cpp | 1 + .../InterpreterShowAccessQuery.cpp | 1 + ...InterpreterShowCreateAccessEntityQuery.cpp | 2 + .../InterpreterShowCreateQuery.cpp | 1 + .../InterpreterShowGrantsQuery.cpp | 2 + .../InterpreterShowTablesQuery.cpp | 4 + src/Interpreters/QueryAliasesVisitor.cpp | 1 + src/Interpreters/Set.cpp | 1 + src/Interpreters/TreeRewriter.cpp | 1 + src/Interpreters/executeQuery.cpp | 1 + src/Interpreters/tests/CMakeLists.txt | 3 - src/Interpreters/tests/users.cpp | 282 ------------------ src/Parsers/ASTCreateRowPolicyQuery.cpp | 1 + src/Parsers/DumpASTNode.h | 1 + src/Parsers/IAST.cpp | 1 + src/Parsers/IAST.h | 1 + src/Parsers/formatAST.cpp | 1 + src/Parsers/queryToString.cpp | 1 + src/Parsers/tests/gtest_dictionary_parser.cpp | 1 + .../Formats/Impl/AvroRowInputFormat.cpp | 1 + .../Formats/Impl/PrettyBlockOutputFormat.cpp | 5 + .../Impl/PrettyCompactBlockOutputFormat.cpp | 1 + .../Algorithms/CollapsingSortedAlgorithm.cpp | 1 + src/Server/ReplicasStatusHandler.cpp | 1 + src/Server/TCPHandler.cpp | 1 + .../DistributedBlockOutputStream.cpp | 1 + src/Storages/Kafka/StorageKafka.cpp | 2 + src/Storages/MergeTree/KeyCondition.cpp | 2 + src/Storages/MergeTree/MergeTreeData.cpp | 3 + .../MergeTree/MergeTreeDataPartChecksum.cpp | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1 + .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 + .../ReplicatedMergeTreeTableMetadata.cpp | 1 + src/Storages/MutationCommands.cpp | 1 + src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 1 + src/Storages/StorageDictionary.cpp | 1 + src/Storages/StorageDistributed.cpp | 2 + src/Storages/StorageInMemoryMetadata.cpp | 1 + src/Storages/StorageMergeTree.cpp | 3 + src/Storages/StorageReplicatedMergeTree.cpp | 5 + src/Storages/StorageS3.cpp | 1 + src/Storages/System/StorageSystemUsers.cpp | 1 + src/Storages/tests/gtest_storage_log.cpp | 1 + .../transformQueryForExternalDatabase.cpp | 1 + src/TableFunctions/TableFunctionRemote.cpp | 1 + 94 files changed, 173 insertions(+), 326 deletions(-) delete mode 100644 src/Interpreters/tests/users.cpp diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 0c7425327ad..abf4ff12d5a 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -198,6 +198,7 @@ namespace /// Serialize the list of ATTACH queries to a string. std::stringstream ss; + ss.exceptions(std::ios::failbit); for (const ASTPtr & query : queries) ss << *query << ";\n"; String file_contents = std::move(ss).str(); @@ -353,6 +354,7 @@ String DiskAccessStorage::getStorageParamsJSON() const if (readonly) json.set("readonly", readonly.load()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); return oss.str(); } diff --git a/src/Access/LDAPAccessStorage.cpp b/src/Access/LDAPAccessStorage.cpp index b20ef3a39a9..c9f00f2f4ab 100644 --- a/src/Access/LDAPAccessStorage.cpp +++ b/src/Access/LDAPAccessStorage.cpp @@ -151,6 +151,7 @@ String LDAPAccessStorage::getStorageParamsJSON() const params_json.set("roles", default_role_names); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(params_json, oss); return oss.str(); diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index ce10ebf0bcc..ba3c2d5b8e5 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -461,6 +461,7 @@ String UsersConfigAccessStorage::getStorageParamsJSON() const if (!path.empty()) json.set("path", path); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(json, oss); return oss.str(); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 02b9003eb96..ba9b235de07 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -245,6 +245,7 @@ public: { DB::writeIntBinary(this->data(place).total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << this->data(place).rng; DB::writeStringBinary(rng_stream.str(), buf); } @@ -275,6 +276,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> this->data(place).rng; } @@ -564,6 +566,7 @@ public: { DB::writeIntBinary(data(place).total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << data(place).rng; DB::writeStringBinary(rng_stream.str(), buf); } @@ -598,6 +601,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> data(place).rng; } diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index bbb8172b209..f82b0b856a0 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -191,6 +191,7 @@ public: std::string rng_string; DB::readStringBinary(rng_string, buf); std::istringstream rng_stream(rng_string); + rng_stream.exceptions(std::ios::failbit); rng_stream >> rng; for (size_t i = 0; i < samples.size(); ++i) @@ -205,6 +206,7 @@ public: DB::writeIntBinary(total_values, buf); std::ostringstream rng_stream; + rng_stream.exceptions(std::ios::failbit); rng_stream << rng; DB::writeStringBinary(rng_stream.str(), buf); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 282aaabd119..b6cb55fa979 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -223,6 +223,7 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const { bool is_first = true; std::ostringstream os; + os.exceptions(std::ios::failbit); for (const ReplicaState & state : replica_states) { const Connection * connection = state.connection; diff --git a/src/Columns/tests/gtest_weak_hash_32.cpp b/src/Columns/tests/gtest_weak_hash_32.cpp index ea4c0db33e9..a7fd60a3b9c 100644 --- a/src/Columns/tests/gtest_weak_hash_32.cpp +++ b/src/Columns/tests/gtest_weak_hash_32.cpp @@ -71,7 +71,8 @@ void checkColumn( std::unordered_map map; size_t num_collisions = 0; - std::stringstream collitions_str; + std::stringstream collisions_str; + collisions_str.exceptions(std::ios::failbit); for (size_t i = 0; i < eq_class.size(); ++i) { @@ -86,14 +87,14 @@ void checkColumn( if (num_collisions <= max_collisions_to_print) { - collitions_str << "Collision:\n"; - collitions_str << print_for_row(it->second) << '\n'; - collitions_str << print_for_row(i) << std::endl; + collisions_str << "Collision:\n"; + collisions_str << print_for_row(it->second) << '\n'; + collisions_str << print_for_row(i) << std::endl; } if (num_collisions > allowed_collisions) { - std::cerr << collitions_str.rdbuf(); + std::cerr << collisions_str.rdbuf(); break; } } diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index d2213a01498..8a6093c47c9 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -538,6 +538,7 @@ XMLDocumentPtr ConfigProcessor::processConfig( *has_zk_includes = !contributing_zk_paths.empty(); std::stringstream comment; + comment.exceptions(std::ios::failbit); comment << " This file was generated automatically.\n"; comment << " Do not edit it: it is likely to be discarded and generated again before it's read next time.\n"; comment << " Files used to generate this file:"; diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp index 0214fa7b065..820e3857bfc 100644 --- a/src/Common/Exception.cpp +++ b/src/Common/Exception.cpp @@ -246,6 +246,7 @@ static std::string getExtraExceptionInfo(const std::exception & e) std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace /*= false*/, bool with_extra_info /*= true*/) { std::stringstream stream; + stream.exceptions(std::ios::failbit); try { @@ -365,6 +366,7 @@ void tryLogException(std::exception_ptr e, Poco::Logger * logger, const std::str std::string getExceptionMessage(const Exception & e, bool with_stacktrace, bool check_embedded_stacktrace) { std::stringstream stream; + stream.exceptions(std::ios::failbit); try { diff --git a/src/Common/HTMLForm.h b/src/Common/HTMLForm.h index 2490d613160..f9b5cc0c520 100644 --- a/src/Common/HTMLForm.h +++ b/src/Common/HTMLForm.h @@ -17,12 +17,14 @@ struct HTMLForm : public Poco::Net::HTMLForm { Poco::URI uri(request.getURI()); std::istringstream istr(uri.getRawQuery()); + istr.exceptions(std::ios::failbit); readUrl(istr); } HTMLForm(const Poco::URI & uri) { std::istringstream istr(uri.getRawQuery()); + istr.exceptions(std::ios::failbit); readUrl(istr); } diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 380fcb1b2b6..5257f95898a 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -134,6 +134,7 @@ void MemoryTracker::alloc(Int64 size) ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; + message.exceptions(std::ios::failbit); message << "Memory tracker"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; @@ -166,6 +167,7 @@ void MemoryTracker::alloc(Int64 size) ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded); std::stringstream message; + message.exceptions(std::ios::failbit); message << "Memory limit"; if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index 992419adb6d..db0928ea605 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -74,6 +74,7 @@ ShellCommand::~ShellCommand() void ShellCommand::logCommand(const char * filename, char * const argv[]) { std::stringstream args; + args.exceptions(std::ios::failbit); for (int i = 0; argv != nullptr && argv[i] != nullptr; ++i) { if (i > 0) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 677af269ca0..7e9474ac3a7 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -24,6 +24,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context) { std::stringstream error; + error.exceptions(std::ios::failbit); switch (sig) { case SIGSEGV: @@ -319,6 +320,7 @@ static void toStringEveryLineImpl( std::unordered_map dwarfs; std::stringstream out; + out.exceptions(std::ios::failbit); for (size_t i = offset; i < size; ++i) { @@ -358,6 +360,7 @@ static void toStringEveryLineImpl( } #else std::stringstream out; + out.exceptions(std::ios::failbit); for (size_t i = offset; i < size; ++i) { @@ -373,6 +376,7 @@ static void toStringEveryLineImpl( static std::string toStringImpl(const StackTrace::FramePointers & frame_pointers, size_t offset, size_t size) { std::stringstream out; + out.exceptions(std::ios::failbit); toStringEveryLineImpl(frame_pointers, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } diff --git a/src/Common/StudentTTest.cpp b/src/Common/StudentTTest.cpp index 170f06c2877..fe605092acc 100644 --- a/src/Common/StudentTTest.cpp +++ b/src/Common/StudentTTest.cpp @@ -154,6 +154,8 @@ std::pair StudentTTest::compareAndReport(size_t confidence_le double mean_confidence_interval = table_value * t_statistic; std::stringstream ss; + ss.exceptions(std::ios::failbit); + if (mean_difference > mean_confidence_interval && (mean_difference - mean_confidence_interval > 0.0001)) /// difference must be more than 0.0001, to take into account connection latency. { ss << "Difference at " << confidence_level[confidence_level_index] << "% confidence : "; diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index ec9f47aebb1..cb519c9b928 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -415,6 +415,7 @@ std::vector PerfEventsCounters::eventIndicesFromString(const std::string } std::istringstream iss(events_list); + iss.exceptions(std::ios::failbit); std::string event_name; while (std::getline(iss, event_name, ',')) { diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index bac0559fc6b..f5ad28f57af 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -80,6 +80,7 @@ void ThreadStatus::assertState(const std::initializer_list & permitted_stat } std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Unexpected thread state " << getCurrentState(); if (description) ss << ": " << description; diff --git a/src/Common/UInt128.h b/src/Common/UInt128.h index 3944d8073c2..7b6f8e7c7be 100644 --- a/src/Common/UInt128.h +++ b/src/Common/UInt128.h @@ -49,6 +49,7 @@ struct UInt128 String toHexString() const { std::ostringstream os; + os.exceptions(std::ios::failbit); os << std::setw(16) << std::setfill('0') << std::hex << high << low; return String(os.str()); } diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h index a425cd36b11..c794d2fe3cd 100644 --- a/src/Common/XDBCBridgeHelper.h +++ b/src/Common/XDBCBridgeHelper.h @@ -308,6 +308,7 @@ struct ODBCBridgeMixin path.setFileName("clickhouse-odbc-bridge"); std::stringstream command; + command.exceptions(std::ios::failbit); #if !CLICKHOUSE_SPLIT_BINARY cmd_args.push_back("odbc-bridge"); diff --git a/src/Common/ZooKeeper/TestKeeper.cpp b/src/Common/ZooKeeper/TestKeeper.cpp index 4f7beadef5f..f7db8a85f96 100644 --- a/src/Common/ZooKeeper/TestKeeper.cpp +++ b/src/Common/ZooKeeper/TestKeeper.cpp @@ -219,6 +219,7 @@ std::pair TestKeeperCreateRequest::process(TestKeeper::Contai ++it->second.seq_num; std::stringstream seq_num_str; + seq_num_str.exceptions(std::ios::failbit); seq_num_str << std::setw(10) << std::setfill('0') << seq_num; path_created += seq_num_str.str(); diff --git a/src/Common/checkStackSize.cpp b/src/Common/checkStackSize.cpp index 10e93a8356c..bdc117eccac 100644 --- a/src/Common/checkStackSize.cpp +++ b/src/Common/checkStackSize.cpp @@ -81,6 +81,7 @@ __attribute__((__weak__)) void checkStackSize() if (stack_size * 2 > max_stack_size) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Stack size too large" << ". Stack address: " << stack_address << ", frame address: " << frame_address diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index c07664c4662..19a9e9d50b6 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -20,6 +20,7 @@ namespace DB std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_globs) { std::ostringstream oss_for_escaping; + oss_for_escaping.exceptions(std::ios::failbit); /// Escaping only characters that not used in glob syntax for (const auto & letter : initial_str_with_globs) { @@ -33,6 +34,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob re2::StringPiece input(escaped_with_globs); re2::StringPiece matched; std::ostringstream oss_for_replacing; + oss_for_replacing.exceptions(std::ios::failbit); size_t current_index = 0; while (RE2::FindAndConsume(&input, enum_or_range, &matched)) { @@ -45,6 +47,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob size_t range_end = 0; char point; std::istringstream iss_range(buffer); + iss_range.exceptions(std::ios::failbit); iss_range >> range_begin >> point >> point >> range_end; assert(!iss_range.fail()); bool leading_zeros = buffer[0] == '0'; @@ -71,6 +74,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob oss_for_replacing << escaped_with_globs.substr(current_index); std::string almost_res = oss_for_replacing.str(); std::ostringstream oss_final_processing; + oss_final_processing.exceptions(std::ios::failbit); for (const auto & letter : almost_res) { if ((letter == '?') || (letter == '*')) diff --git a/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp b/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp index 4756043acbf..743091e7276 100644 --- a/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp +++ b/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp @@ -19,6 +19,8 @@ TEST(Common, getMultipleValuesFromConfig) )END"); + xml_isteam.exceptions(std::ios::failbit); + Poco::AutoPtr config = new Poco::Util::XMLConfiguration(xml_isteam); std::vector answer = getMultipleValuesFromConfig(*config, "first_level", "second_level"); std::vector right_answer = {"0", "1", "2", "3"}; diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index d5133b2ef95..43d7b9499a6 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -135,6 +135,8 @@ TEST(Common, SensitiveDataMasker) )END"); + xml_isteam.exceptions(std::ios::failbit); + Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam); DB::SensitiveDataMasker masker_xml_based(*xml_config, "query_masking_rules"); std::string top_secret = "The e-mail of IVAN PETROV is kotik1902@sdsdf.test, and the password is qwerty123"; @@ -165,6 +167,8 @@ TEST(Common, SensitiveDataMasker) )END"); + + xml_isteam_bad.exceptions(std::ios::failbit); Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); @@ -187,6 +191,8 @@ TEST(Common, SensitiveDataMasker) )END"); + xml_isteam_bad.exceptions(std::ios::failbit); + Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); @@ -209,6 +215,8 @@ TEST(Common, SensitiveDataMasker) )END"); + xml_isteam_bad.exceptions(std::ios::failbit); + Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); diff --git a/src/Compression/tests/compressed_buffer.cpp b/src/Compression/tests/compressed_buffer.cpp index 89bfe0d0cce..c018fc95995 100644 --- a/src/Compression/tests/compressed_buffer.cpp +++ b/src/Compression/tests/compressed_buffer.cpp @@ -52,6 +52,7 @@ int main(int, char **) if (x != i) { std::stringstream s; + s.exceptions(std::ios::failbit); s << "Failed!, read: " << x << ", expected: " << i; throw DB::Exception(s.str(), 0); } diff --git a/src/Core/MySQL/IMySQLReadPacket.cpp b/src/Core/MySQL/IMySQLReadPacket.cpp index 5f6bbc7bceb..676f3986ba4 100644 --- a/src/Core/MySQL/IMySQLReadPacket.cpp +++ b/src/Core/MySQL/IMySQLReadPacket.cpp @@ -22,6 +22,7 @@ void IMySQLReadPacket::readPayload(ReadBuffer & in, uint8_t & sequence_id) if (!payload.eof()) { std::stringstream tmp; + tmp.exceptions(std::ios::failbit); tmp << "Packet payload is not fully read. Stopped after " << payload.count() << " bytes, while " << payload.available() << " bytes are in buffer."; throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); } diff --git a/src/Core/MySQL/IMySQLWritePacket.cpp b/src/Core/MySQL/IMySQLWritePacket.cpp index f5bc339b079..3e97800177c 100644 --- a/src/Core/MySQL/IMySQLWritePacket.cpp +++ b/src/Core/MySQL/IMySQLWritePacket.cpp @@ -16,6 +16,7 @@ void IMySQLWritePacket::writePayload(WriteBuffer & buffer, uint8_t & sequence_id if (buf.remainingPayloadSize()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Incomplete payload. Written " << getPayloadSize() - buf.remainingPayloadSize() << " bytes, expected " << getPayloadSize() << " bytes."; throw Exception(ss.str(), 0); } diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h index bd3b7bc45ff..98229bb73d7 100644 --- a/src/Core/SortDescription.h +++ b/src/Core/SortDescription.h @@ -61,6 +61,7 @@ struct SortColumnDescription std::string dump() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << column_name << ":" << column_number << ":dir " << direction << "nulls " << nulls_direction; return ss.str(); } diff --git a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp index 7a67074dbdf..d47d82689de 100644 --- a/src/DataStreams/CheckConstraintsBlockOutputStream.cpp +++ b/src/DataStreams/CheckConstraintsBlockOutputStream.cpp @@ -60,6 +60,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) if (!value) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Constraint " << backQuote(constraint_ptr->name) << " for table " << table_id.getNameForLogs() @@ -87,6 +88,7 @@ void CheckConstraintsBlockOutputStream::write(const Block & block) Names related_columns = constraint_expr->getRequiredColumns(); std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Constraint " << backQuote(constraint_ptr->name) << " for table " << table_id.getNameForLogs() diff --git a/src/DataStreams/IBlockInputStream.cpp b/src/DataStreams/IBlockInputStream.cpp index e954225fdf9..23ba9ff2970 100644 --- a/src/DataStreams/IBlockInputStream.cpp +++ b/src/DataStreams/IBlockInputStream.cpp @@ -360,6 +360,7 @@ Block IBlockInputStream::getExtremes() String IBlockInputStream::getTreeID() const { std::stringstream s; + s.exceptions(std::ios::failbit); s << getName(); if (!children.empty()) diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index 2583f4f2753..3fe17a4bbfc 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -33,6 +33,7 @@ static const std::vector supported_functions{"any", "anyLast", "min", String DataTypeCustomSimpleAggregateFunction::getName() const { std::stringstream stream; + stream.exceptions(std::ios::failbit); stream << "SimpleAggregateFunction(" << function->getName(); if (!parameters.empty()) diff --git a/src/DataTypes/DataTypesDecimal.cpp b/src/DataTypes/DataTypesDecimal.cpp index 5aefd39fb16..bd4329f6f58 100644 --- a/src/DataTypes/DataTypesDecimal.cpp +++ b/src/DataTypes/DataTypesDecimal.cpp @@ -30,6 +30,7 @@ template std::string DataTypeDecimal::doGetName() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Decimal(" << this->precision << ", " << this->scale << ")"; return ss.str(); } diff --git a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index 8212555e8bc..5256d49ba6d 100644 --- a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -27,6 +27,7 @@ static auto typeFromString(const std::string & str) static auto typesFromString(const std::string & str) { std::istringstream data_types_stream(str); + data_types_stream.exceptions(std::ios::failbit); DataTypes data_types; std::string data_type; while (data_types_stream >> data_type) diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 4d7fcd4e248..83e70a25f87 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -95,6 +95,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) if (!create) { std::ostringstream query_stream; + query_stream.exceptions(std::ios::failbit); formatAST(*query, query_stream, true); throw Exception("Query '" + query_stream.str() + "' is not CREATE query", ErrorCodes::LOGICAL_ERROR); } @@ -121,6 +122,7 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) create->table = TABLE_WITH_UUID_NAME_PLACEHOLDER; std::ostringstream statement_stream; + statement_stream.exceptions(std::ios::failbit); formatAST(*create, statement_stream, false); statement_stream << '\n'; return statement_stream.str(); diff --git a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp index e26f5c2fd52..d86462a41bc 100644 --- a/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.cpp @@ -128,6 +128,7 @@ static String checkVariableAndGetVersion(const mysqlxx::Pool::Entry & connection bool first = true; std::stringstream error_message; + error_message.exceptions(std::ios::failbit); error_message << "Illegal MySQL variables, the MaterializeMySQL engine requires "; for (const auto & [variable_name, variable_error_message] : variables_error_message) { @@ -239,6 +240,7 @@ static inline BlockOutputStreamPtr getTableOutput(const String & database_name, const StoragePtr & storage = DatabaseCatalog::instance().getTable(StorageID(database_name, table_name), query_context); std::stringstream insert_columns_str; + insert_columns_str.exceptions(std::ios::failbit); const StorageInMemoryMetadata & storage_metadata = storage->getInMemoryMetadata(); const ColumnsDescription & storage_columns = storage_metadata.getColumns(); const NamesAndTypesList & insert_columns_names = insert_materialized ? storage_columns.getAllPhysical() : storage_columns.getOrdinary(); @@ -330,6 +332,7 @@ std::optional MaterializeMySQLSyncThread::prepareSynchroniz const auto & position_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); position.dump(ss); return ss.str(); }; @@ -372,6 +375,7 @@ void MaterializeMySQLSyncThread::flushBuffersData(Buffers & buffers, Materialize const auto & position_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); client.getPosition().dump(ss); return ss.str(); }; @@ -643,6 +647,7 @@ void MaterializeMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPtr const auto & dump_event_message = [&]() { std::stringstream ss; + ss.exceptions(std::ios::failbit); receive_event->dump(ss); return ss.str(); }; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index fca56442e6f..4c7cc5b4118 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -231,6 +231,7 @@ std::string DictionaryStructure::getKeyDescription() const return "UInt64"; std::ostringstream out; + out.exceptions(std::ios::failbit); out << '('; diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 453ce2b81f0..62422124bd8 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -19,6 +19,7 @@ static std::string configurationToString(const DictionaryConfigurationPtr & conf { const Poco::Util::XMLConfiguration * xml_config = dynamic_cast(config.get()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); xml_config->save(oss); return oss.str(); } diff --git a/src/Functions/abtesting.cpp b/src/Functions/abtesting.cpp index c57119d1c34..25c0abbdded 100644 --- a/src/Functions/abtesting.cpp +++ b/src/Functions/abtesting.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include #define STATS_ENABLE_STDVEC_WRAPPERS #include @@ -139,31 +139,29 @@ Variants bayesian_ab_test(String distribution, PODArray & xs, PODArray< String convertToJson(const PODArray & variant_names, const Variants & variants) { FormatSettings settings; - std::stringstream s; + WriteBufferOwnString buf; + + writeCString("{\"data\":[", buf); + for (size_t i = 0; i < variants.size(); ++i) { - WriteBufferFromOStream buf(s); - - writeCString("{\"data\":[", buf); - for (size_t i = 0; i < variants.size(); ++i) - { - writeCString("{\"variant_name\":", buf); - writeJSONString(variant_names[i], buf, settings); - writeCString(",\"x\":", buf); - writeText(variants[i].x, buf); - writeCString(",\"y\":", buf); - writeText(variants[i].y, buf); - writeCString(",\"beats_control\":", buf); - writeText(variants[i].beats_control, buf); - writeCString(",\"to_be_best\":", buf); - writeText(variants[i].best, buf); - writeCString("}", buf); - if (i != variant_names.size() -1) writeCString(",", buf); - } - writeCString("]}", buf); + writeCString("{\"variant_name\":", buf); + writeJSONString(variant_names[i], buf, settings); + writeCString(",\"x\":", buf); + writeText(variants[i].x, buf); + writeCString(",\"y\":", buf); + writeText(variants[i].y, buf); + writeCString(",\"beats_control\":", buf); + writeText(variants[i].beats_control, buf); + writeCString(",\"to_be_best\":", buf); + writeText(variants[i].best, buf); + writeCString("}", buf); + if (i != variant_names.size() -1) + writeCString(",", buf); } + writeCString("]}", buf); - return s.str(); + return buf.str(); } class FunctionBayesAB : public IFunction diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index bda615edcd5..04fec145775 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -240,6 +240,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR if (!(status == Poco::Net::HTTPResponse::HTTP_OK || (isRedirect(status) && allow_redirects))) { std::stringstream error_message; + error_message.exceptions(std::ios::failbit); error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " " << response.getReason() << ", body: " << istr.rdbuf(); diff --git a/src/IO/MySQLPacketPayloadReadBuffer.cpp b/src/IO/MySQLPacketPayloadReadBuffer.cpp index 16b1cd5de19..f6f899e0ac7 100644 --- a/src/IO/MySQLPacketPayloadReadBuffer.cpp +++ b/src/IO/MySQLPacketPayloadReadBuffer.cpp @@ -27,20 +27,14 @@ bool MySQLPacketPayloadReadBuffer::nextImpl() in.readStrict(reinterpret_cast(&payload_length), 3); if (payload_length > MAX_PACKET_LENGTH) - { - std::ostringstream tmp; - tmp << "Received packet with payload larger than max_packet_size: " << payload_length; - throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); - } + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, + "Received packet with payload larger than max_packet_size: {}", payload_length); size_t packet_sequence_id = 0; in.read(reinterpret_cast(packet_sequence_id)); if (packet_sequence_id != sequence_id) - { - std::ostringstream tmp; - tmp << "Received packet with wrong sequence-id: " << packet_sequence_id << ". Expected: " << static_cast(sequence_id) << '.'; - throw Exception(tmp.str(), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); - } + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, + "Received packet with wrong sequence-id: {}. Expected: {}.", packet_sequence_id, static_cast(sequence_id)); sequence_id++; if (payload_length == 0) diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index ee6fcc58ab0..267800d8900 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -72,10 +72,7 @@ public: } else { - std::stringstream error_message; - error_message << "Too many redirects while trying to access " << initial_uri.toString(); - - throw Exception(error_message.str(), ErrorCodes::TOO_MANY_REDIRECTS); + throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString()); } } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a649b76730b..49ccb6dc1b3 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -248,6 +248,7 @@ void PocoHTTPClient::makeRequestInternal( response->SetContentType(poco_response.getContentType()); std::stringstream headers_ss; + headers_ss.exceptions(std::ios::failbit); for (const auto & [header_name, header_value] : poco_response) { response->AddHeader(header_name, header_value); diff --git a/src/IO/tests/gtest_bit_io.cpp b/src/IO/tests/gtest_bit_io.cpp index f75abf92f30..dce146eaad7 100644 --- a/src/IO/tests/gtest_bit_io.cpp +++ b/src/IO/tests/gtest_bit_io.cpp @@ -77,6 +77,7 @@ std::string dumpContents(const T& container, { std::stringstream sstr; + sstr.exceptions(std::ios::failbit); dumpBuffer(std::begin(container), std::end(container), &sstr, col_sep, row_sep, cols_in_row); return sstr.str(); diff --git a/src/IO/tests/hashing_read_buffer.cpp b/src/IO/tests/hashing_read_buffer.cpp index dbec6b2374b..a1140160c32 100644 --- a/src/IO/tests/hashing_read_buffer.cpp +++ b/src/IO/tests/hashing_read_buffer.cpp @@ -23,6 +23,7 @@ static void test(size_t data_size) { std::cout << "block size " << read_buffer_block_size << std::endl; std::stringstream io; + io.exceptions(std::ios::failbit); DB::WriteBufferFromOStream out_impl(io); DB::HashingWriteBuffer out(out_impl); out.write(data, data_size); diff --git a/src/IO/tests/limit_read_buffer2.cpp b/src/IO/tests/limit_read_buffer2.cpp index 826fb048a0c..416eae0966b 100644 --- a/src/IO/tests/limit_read_buffer2.cpp +++ b/src/IO/tests/limit_read_buffer2.cpp @@ -21,6 +21,7 @@ try using namespace DB; std::stringstream s; + s.exceptions(std::ios::failbit); { std::string src = "1"; diff --git a/src/IO/tests/write_buffer.cpp b/src/IO/tests/write_buffer.cpp index 14beb6d0539..c0e9150d372 100644 --- a/src/IO/tests/write_buffer.cpp +++ b/src/IO/tests/write_buffer.cpp @@ -17,6 +17,7 @@ int main(int, char **) DB::String d = "'xyz\\"; std::stringstream s; + s.exceptions(std::ios::failbit); { DB::WriteBufferFromOStream out(s); diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 7d99cbd1d43..9e64695d1a0 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -107,6 +107,7 @@ String formattedAST(const ASTPtr & ast) if (!ast) return {}; std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*ast, ss, false, true); return ss.str(); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index befb097faf7..fb6e218b33b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1962,6 +1962,7 @@ void Context::checkCanBeDropped(const String & database, const String & table, c String size_str = formatReadableSizeWithDecimalSuffix(size); String max_size_to_drop_str = formatReadableSizeWithDecimalSuffix(max_size_to_drop); std::stringstream ostr; + ostr.exceptions(std::ios::failbit); ostr << "Table or Partition in " << backQuoteIfNeed(database) << "." << backQuoteIfNeed(table) << " was not dropped.\n" << "Reason:\n" diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 762ad6ae575..75872553ec3 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -454,6 +454,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const std::string ExpressionAction::toString() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); switch (type) { case ADD_COLUMN: @@ -550,6 +551,7 @@ void ExpressionActions::checkLimits(Block & block) const if (non_const_columns > settings.max_temporary_non_const_columns) { std::stringstream list_of_non_const_columns; + list_of_non_const_columns.exceptions(std::ios::failbit); for (size_t i = 0, size = block.columns(); i < size; ++i) if (block.safeGetByPosition(i).column && !isColumnConst(*block.safeGetByPosition(i).column)) list_of_non_const_columns << "\n" << block.safeGetByPosition(i).name; @@ -921,6 +923,7 @@ void ExpressionActions::finalize(const Names & output_columns) std::string ExpressionActions::dumpActions() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "input:\n"; for (const auto & input_column : input_columns) @@ -1342,6 +1345,7 @@ void ExpressionActionsChain::finalize() std::string ExpressionActionsChain::dumpChain() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (size_t i = 0; i < steps.size(); ++i) { diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 6a8bdbea1ec..286d5269a64 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -136,6 +136,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { /// Currently, there are no database engines, that support any arguments. std::stringstream ostr; + ostr.exceptions(std::ios::failbit); formatAST(*create.storage, ostr, false, false); throw Exception("Unknown database engine: " + ostr.str(), ErrorCodes::UNKNOWN_DATABASE_ENGINE); } @@ -182,6 +183,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) create.if_not_exists = false; std::ostringstream statement_stream; + statement_stream.exceptions(std::ios::failbit); formatAST(create, statement_stream, false); statement_stream << '\n'; String statement = statement_stream.str(); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index a0a63dfed08..ed791f0d592 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -223,6 +223,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() MutableColumns res_columns = sample_block.cloneEmptyColumns(); std::stringstream ss; + ss.exceptions(std::ios::failbit); if (ast.getKind() == ASTExplainQuery::ParsedAST) { diff --git a/src/Interpreters/InterpreterShowAccessQuery.cpp b/src/Interpreters/InterpreterShowAccessQuery.cpp index c9541b4f5bf..5f28c49c0bc 100644 --- a/src/Interpreters/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/InterpreterShowAccessQuery.cpp @@ -35,6 +35,7 @@ BlockInputStreamPtr InterpreterShowAccessQuery::executeImpl() const /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream ss; + ss.exceptions(std::ios::failbit); for (const auto & query : queries) { ss.str(""); diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 8d5f27e116d..749a5811e13 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -239,6 +239,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream create_query_ss; + create_query_ss.exceptions(std::ios::failbit); for (const auto & create_query : create_queries) { formatAST(*create_query, create_query_ss, false, true); @@ -248,6 +249,7 @@ BlockInputStreamPtr InterpreterShowCreateAccessEntityQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + desc_ss.exceptions(std::ios::failbit); const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index b14baaafbb9..8861914a68a 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -79,6 +79,7 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() } std::stringstream stream; + stream.exceptions(std::ios::failbit); formatAST(*create_query, stream, false, false); String res = stream.str(); diff --git a/src/Interpreters/InterpreterShowGrantsQuery.cpp b/src/Interpreters/InterpreterShowGrantsQuery.cpp index 45e065dcfd9..7de51b6a7ee 100644 --- a/src/Interpreters/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/InterpreterShowGrantsQuery.cpp @@ -119,6 +119,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() /// Build the result column. MutableColumnPtr column = ColumnString::create(); std::stringstream grant_ss; + grant_ss.exceptions(std::ios::failbit); for (const auto & grant_query : grant_queries) { grant_ss.str(""); @@ -128,6 +129,7 @@ BlockInputStreamPtr InterpreterShowGrantsQuery::executeImpl() /// Prepare description of the result column. std::stringstream desc_ss; + desc_ss.exceptions(std::ios::failbit); const auto & show_query = query_ptr->as(); formatAST(show_query, desc_ss, false, true); String desc = desc_ss.str(); diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index cb5db386f5a..8e67cecdd5e 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -33,6 +33,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.databases) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT name FROM system.databases"; if (!query.like.empty()) @@ -54,6 +55,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.clusters) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT DISTINCT cluster FROM system.clusters"; if (!query.like.empty()) @@ -73,6 +75,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() else if (query.cluster) { std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT * FROM system.clusters"; rewritten_query << " WHERE cluster = " << std::quoted(query.cluster_str, '\''); @@ -87,6 +90,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() DatabaseCatalog::instance().assertDatabaseExists(database); std::stringstream rewritten_query; + rewritten_query.exceptions(std::ios::failbit); rewritten_query << "SELECT name FROM system."; if (query.dictionaries) diff --git a/src/Interpreters/QueryAliasesVisitor.cpp b/src/Interpreters/QueryAliasesVisitor.cpp index 6eae5594810..9de1d04990d 100644 --- a/src/Interpreters/QueryAliasesVisitor.cpp +++ b/src/Interpreters/QueryAliasesVisitor.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes static String wrongAliasMessage(const ASTPtr & ast, const ASTPtr & prev_ast, const String & alias) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Different expressions with the same alias " << backQuoteIfNeed(alias) << ":" << std::endl; formatAST(*ast, message, false, true); message << std::endl << "and" << std::endl; diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 13606e1d54c..907cbaee243 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -343,6 +343,7 @@ void Set::checkColumnsNumber(size_t num_key_columns) const if (data_types.size() != num_key_columns) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Number of columns in section IN doesn't match. " << num_key_columns << " at left, " << data_types.size() << " at right."; throw Exception(message.str(), ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index c8691c25f1b..8d3cb123955 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -553,6 +553,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (!unknown_required_source_columns.empty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Missing columns:"; for (const auto & name : unknown_required_source_columns) ss << " '" << name << "'"; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 8faccf7bc7b..cdb3d9b7d7b 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -779,6 +779,7 @@ static std::tuple executeQueryImpl( if (!internal && res.in) { std::stringstream log_str; + log_str.exceptions(std::ios::failbit); log_str << "Query pipeline:\n"; res.in->dumpTree(log_str); LOG_DEBUG(&Poco::Logger::get("executeQuery"), log_str.str()); diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt index 4ab7da014e4..20aa73166fb 100644 --- a/src/Interpreters/tests/CMakeLists.txt +++ b/src/Interpreters/tests/CMakeLists.txt @@ -37,9 +37,6 @@ add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor. target_link_libraries (in_join_subqueries_preprocessor PRIVATE clickhouse_aggregate_functions dbms clickhouse_parsers) add_check(in_join_subqueries_preprocessor) -add_executable (users users.cpp) -target_link_libraries (users PRIVATE clickhouse_aggregate_functions dbms clickhouse_common_config) - if (OS_LINUX) add_executable (internal_iotop internal_iotop.cpp) target_link_libraries (internal_iotop PRIVATE dbms) diff --git a/src/Interpreters/tests/users.cpp b/src/Interpreters/tests/users.cpp deleted file mode 100644 index acd0cfd0519..00000000000 --- a/src/Interpreters/tests/users.cpp +++ /dev/null @@ -1,282 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace -{ - -namespace fs = std::filesystem; - -struct TestEntry -{ - std::string user_name; - std::string database_name; - bool is_allowed; -}; - -using TestEntries = std::vector; - -struct TestDescriptor -{ - const char * config_content; - TestEntries entries; -}; - -using TestSet = std::vector; - -/// Tests description. - -TestSet test_set = -{ - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " test" - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", true }, - { "default", "stats", false }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", false }, - { "default", "stats", false }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " " - " " - " " - " defaultdefault" - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", true }, - { "default", "stats", true }, - { "web", "default", true }, - { "web", "test", true }, - { "web", "stats", true }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - }, - - { - "" - " " - " " - " " - " defaultdefault" - " " - " default" - " " - " " - " " - " defaultdefault" - " " - " test" - " " - " " - " " - " " - "", - - { - { "default", "default", true }, - { "default", "test", false }, - { "default", "stats", false }, - { "web", "default", false }, - { "web", "test", true }, - { "web", "stats", false }, - { "analytics", "default", false }, - { "analytics", "test", false }, - { "analytics", "stats", false } - } - } -}; - -std::string createTmpPath(const std::string & filename) -{ - char pattern[] = "/tmp/fileXXXXXX"; - char * dir = mkdtemp(pattern); - if (dir == nullptr) - throw std::runtime_error("Could not create directory"); - - return std::string(dir) + "/" + filename; -} - -void createFile(const std::string & filename, const char * data) -{ - std::ofstream ofs(filename.c_str()); - if (!ofs.is_open()) - throw std::runtime_error("Could not open file " + filename); - ofs << data; -} - -void runOneTest(const TestDescriptor & test_descriptor) -{ - const auto path_name = createTmpPath("users.xml"); - createFile(path_name, test_descriptor.config_content); - - DB::ConfigurationPtr config; - - try - { - config = DB::ConfigProcessor(path_name).loadConfig().configuration; - } - catch (const Poco::Exception & ex) - { - std::ostringstream os; - os << "Error: " << ex.what() << ": " << ex.displayText(); - throw std::runtime_error(os.str()); - } - - DB::AccessControlManager acl_manager; - - try - { - acl_manager.setUsersConfig(*config); - } - catch (const Poco::Exception & ex) - { - std::ostringstream os; - os << "Error: " << ex.what() << ": " << ex.displayText(); - throw std::runtime_error(os.str()); - } - - for (const auto & entry : test_descriptor.entries) - { - bool res; - - try - { - res = acl_manager.read(entry.user_name)->access.isGranted(DB::AccessType::ALL, entry.database_name); - } - catch (const Poco::Exception &) - { - res = false; - } - - if (res != entry.is_allowed) - { - auto to_string = [](bool access){ return (access ? "'granted'" : "'denied'"); }; - std::ostringstream os; - os << "(user=" << entry.user_name << ", database=" << entry.database_name << "): "; - os << "Expected " << to_string(entry.is_allowed) << " but got " << to_string(res); - throw std::runtime_error(os.str()); - } - } - - fs::remove_all(fs::path(path_name).parent_path().string()); -} - -auto runTestSet() -{ - size_t test_num = 1; - size_t failure_count = 0; - - for (const auto & test_descriptor : test_set) - { - try - { - runOneTest(test_descriptor); - std::cout << "Test " << test_num << " passed\n"; - } - catch (const std::runtime_error & ex) - { - std::cerr << "Test " << test_num << " failed with reason: " << ex.what() << "\n"; - ++failure_count; - } - catch (...) - { - std::cerr << "Test " << test_num << " failed with unknown reason\n"; - ++failure_count; - } - - ++test_num; - } - - return std::make_tuple(test_set.size(), failure_count); -} - -} - -int main() -{ - size_t test_count; - size_t failure_count; - - std::tie(test_count, failure_count) = runTestSet(); - - std::cout << (test_count - failure_count) << " test(s) passed out of " << test_count << "\n"; - - return (failure_count == 0) ? 0 : EXIT_FAILURE; -} diff --git a/src/Parsers/ASTCreateRowPolicyQuery.cpp b/src/Parsers/ASTCreateRowPolicyQuery.cpp index 640b030b6cf..6224b534851 100644 --- a/src/Parsers/ASTCreateRowPolicyQuery.cpp +++ b/src/Parsers/ASTCreateRowPolicyQuery.cpp @@ -63,6 +63,7 @@ namespace { std::vector> conditions_as_strings; std::stringstream temp_sstream; + temp_sstream.exceptions(std::ios::failbit); IAST::FormatSettings temp_settings(temp_sstream, settings); for (const auto & [condition_type, condition] : conditions) { diff --git a/src/Parsers/DumpASTNode.h b/src/Parsers/DumpASTNode.h index 430e70de8da..01447850c74 100644 --- a/src/Parsers/DumpASTNode.h +++ b/src/Parsers/DumpASTNode.h @@ -95,6 +95,7 @@ public: DebugASTLog() : log(nullptr) { + ss.exceptions(std::ios::failbit); if constexpr (_enable) log = &Poco::Logger::get("AST"); } diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 8ee4154541b..d716a796b77 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -90,6 +90,7 @@ size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const std::string IAST::formatForErrorMessage() const { std::stringstream ss; + ss.exceptions(std::ios::failbit); format(FormatSettings(ss, true /* one line */)); return ss.str(); } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index c88c80021d6..cc9e593d7cb 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -243,6 +243,7 @@ template std::string IAST::formatForErrorMessage(const AstArray & array) { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (size_t i = 0; i < array.size(); ++i) { if (i > 0) diff --git a/src/Parsers/formatAST.cpp b/src/Parsers/formatAST.cpp index fca5130cb89..e19dc715d51 100644 --- a/src/Parsers/formatAST.cpp +++ b/src/Parsers/formatAST.cpp @@ -16,6 +16,7 @@ void formatAST(const IAST & ast, std::ostream & s, bool hilite, bool one_line) String serializeAST(const IAST & ast, bool one_line) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(ast, ss, false, one_line); return ss.str(); } diff --git a/src/Parsers/queryToString.cpp b/src/Parsers/queryToString.cpp index d214468c2a9..44ea721485f 100644 --- a/src/Parsers/queryToString.cpp +++ b/src/Parsers/queryToString.cpp @@ -12,6 +12,7 @@ namespace DB String queryToString(const IAST & query) { std::ostringstream out; + out.exceptions(std::ios::failbit); formatAST(query, out, false, true); return out.str(); } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index c2bde5fa8f1..c418759aa21 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -18,6 +18,7 @@ using namespace DB; static String astToString(IAST * ast) { std::ostringstream oss; + oss.exceptions(std::ios::failbit); dumpAST(*ast, oss); return oss.str(); } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index cf7a020ee0b..8a416ade740 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -161,6 +161,7 @@ static void insertNumber(IColumn & column, WhichDataType type, T value) static std::string nodeToJson(avro::NodePtr root_node) { std::ostringstream ss; + ss.exceptions(std::ios::failbit); root_node->printJson(ss, 0); return ss.str(); } diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 628a90beefb..96a458eb49f 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -168,6 +168,11 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) std::stringstream middle_values_separator; std::stringstream bottom_separator; + top_separator.exceptions(std::ios::failbit); + middle_names_separator.exceptions(std::ios::failbit); + middle_values_separator.exceptions(std::ios::failbit); + bottom_separator.exceptions(std::ios::failbit); + top_separator << grid_symbols.bold_left_top_corner; middle_names_separator << grid_symbols.bold_left_separator; middle_values_separator << grid_symbols.left_separator; diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index b81ba84c732..9320b159836 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -134,6 +134,7 @@ void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) ascii_grid_symbols; /// Create delimiters std::stringstream bottom_separator; + bottom_separator.exceptions(std::ios::failbit); bottom_separator << grid_symbols.left_bottom_corner; for (size_t i = 0; i < max_widths.size(); ++i) diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 697ac9496b5..e7a7200ac34 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -42,6 +42,7 @@ void CollapsingSortedAlgorithm::reportIncorrectData() return; std::stringstream s; + s.exceptions(std::ios::failbit); auto & sort_columns = *last_row.sort_columns; for (size_t i = 0, size = sort_columns.size(); i < size; ++i) { diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 1aa5c10afd7..bf7a3b8ab52 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -37,6 +37,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request bool ok = true; std::stringstream message; + message.exceptions(std::ios::failbit); auto databases = DatabaseCatalog::instance().getDatabases(); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1b0cbc69b29..4dceb0aa905 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -437,6 +437,7 @@ bool TCPHandler::readDataNext(const size_t & poll_interval, const int & receive_ if (elapsed > receive_timeout) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Timeout exceeded while receiving data from client."; ss << " Waited for " << static_cast(elapsed) << " seconds,"; ss << " timeout is " << receive_timeout << " seconds."; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index f08cdf76cbf..c9f4ffe8b6a 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -148,6 +148,7 @@ void DistributedBlockOutputStream::writeAsync(const Block & block) std::string DistributedBlockOutputStream::getCurrentStateDescription() { std::stringstream buffer; + buffer.exceptions(std::ios::failbit); const auto & addresses = cluster->getShardsAddresses(); buffer << "Insertion status:\n"; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index a63a4309775..22b4026340c 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -247,6 +247,7 @@ Names StorageKafka::parseTopics(String topic_list) String StorageKafka::getDefaultClientId(const StorageID & table_id_) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << VERSION_NAME << "-" << getFQDNOrHostName() << "-" << table_id_.database_name << "-" << table_id_.table_name; return ss.str(); } @@ -400,6 +401,7 @@ ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number) if (num_consumers > 1) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << client_id << "-" << consumer_number; conf.set("client.id", ss.str()); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 7cb872f174a..3ccb2f8e2a6 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes String Range::toString() const { std::stringstream str; + str.exceptions(std::ios::failbit); if (!left_bounded) str << "(-inf, "; @@ -1443,6 +1444,7 @@ String KeyCondition::RPNElement::toString() const }; std::ostringstream ss; + ss.exceptions(std::ios::failbit); switch (function) { case FUNCTION_AND: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1a7062766b0..7884dc7beaa 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3206,6 +3206,7 @@ void MergeTreeData::Transaction::rollbackPartsToTemporaryState() if (!isEmpty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << " Rollbacking parts state to temporary and removing from working set:"; for (const auto & part : precommitted_parts) ss << " " << part->relative_path; @@ -3224,6 +3225,7 @@ void MergeTreeData::Transaction::rollback() if (!isEmpty()) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << " Removing parts:"; for (const auto & part : precommitted_parts) ss << " " << part->relative_path; @@ -3759,6 +3761,7 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S || settings.min_rows_for_compact_part != 0 || settings.min_bytes_for_compact_part != 0)) { std::ostringstream message; + message.exceptions(std::ios::failbit); message << "Table can't create parts with adaptive granularity, but settings" << " min_rows_for_wide_part = " << settings.min_rows_for_wide_part << ", min_bytes_for_wide_part = " << settings.min_bytes_for_wide_part diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index c75970f6cc1..58bdbcdcdcd 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -443,6 +443,7 @@ void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChe if (num_compressed_files != rhs.num_compressed_files || num_uncompressed_files != rhs.num_uncompressed_files) { std::stringstream error_msg; + error_msg.exceptions(std::ios::failbit); error_msg << "Different number of files: " << rhs.num_compressed_files << " compressed (expected " << num_compressed_files << ")" << " and " << rhs.num_uncompressed_files << " uncompressed ones (expected " << num_uncompressed_files << ")"; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 7eb21503fe6..e34096b1309 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -228,6 +228,7 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Primary key ("; for (size_t i = 0, size = primary_key_columns.size(); i < size; ++i) exception_message << (i == 0 ? "" : ", ") << primary_key_columns[i]; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 4aa186473a9..597ff5e8fee 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -942,6 +942,7 @@ size_t ReplicatedMergeTreeQueue::getConflictsCountForRange( if (out_description) { std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Can't execute command for range " << range.getPartName() << " (entry " << entry.znode_name << "). "; ss << "There are " << conflicts.size() << " currently executing entries blocking it: "; for (const auto & conflict : conflicts) @@ -1693,6 +1694,7 @@ std::vector ReplicatedMergeTreeQueue::getMutationsStatu for (const MutationCommand & command : entry.commands) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*command.ast, ss, false, true); result.push_back(MergeTreeMutationStatus { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 378b1b284a3..48f05b50675 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -19,6 +19,7 @@ static String formattedAST(const ASTPtr & ast) if (!ast) return ""; std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*ast, ss, false, true); return ss.str(); } diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index ba998dd5951..53c9b50cb9d 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -127,6 +127,7 @@ std::shared_ptr MutationCommands::ast() const void MutationCommands::writeText(WriteBuffer & out) const { std::stringstream commands_ss; + commands_ss.exceptions(std::ios::failbit); formatAST(*ast(), commands_ss, /* hilite = */ false, /* one_line = */ true); out << escape << commands_ss.str(); } diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 9735c4d7fd3..55cecf6f202 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -187,6 +187,7 @@ AMQP::ExchangeType StorageRabbitMQ::defineExchangeType(String exchange_type_) String StorageRabbitMQ::getTableBasedName(String name, const StorageID & table_id) { std::stringstream ss; + ss.exceptions(std::ios::failbit); if (name.empty()) ss << table_id.database_name << "_" << table_id.table_name; diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index e859baa702e..a81c80cf466 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -82,6 +82,7 @@ NamesAndTypesList StorageDictionary::getNamesAndTypes(const DictionaryStructure String StorageDictionary::generateNamesAndTypesDescription(const NamesAndTypesList & list) { std::stringstream ss; + ss.exceptions(std::ios::failbit); bool first = true; for (const auto & name_and_type : list) { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 4409be73e52..a6f8cde37c9 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -176,6 +176,7 @@ UInt64 getMaximumFileNumber(const std::string & dir_path) std::string makeFormattedListOfShards(const ClusterPtr & cluster) { std::ostringstream os; + os.exceptions(std::ios::failbit); bool head = true; os << "["; @@ -749,6 +750,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons if (force) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); if (!has_sharding_key) exception_message << "No sharding key"; else if (!sharding_key_is_usable) diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index f410fa34f59..5cc435f91fa 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -428,6 +428,7 @@ namespace String listOfColumns(const NamesAndTypesList & available_columns) { std::stringstream ss; + ss.exceptions(std::ios::failbit); for (auto it = available_columns.begin(); it != available_columns.end(); ++it) { if (it != available_columns.begin()) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 11e8859e76c..b8b40356c52 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -555,6 +555,7 @@ std::vector StorageMergeTree::getMutationsStatus() cons for (const MutationCommand & command : entry.commands) { std::stringstream ss; + ss.exceptions(std::ios::failbit); formatAST(*command.ast, ss, false, true); result.push_back(MergeTreeMutationStatus { @@ -1030,6 +1031,7 @@ bool StorageMergeTree::optimize( if (!merge(true, partition_id, true, deduplicate, &disable_reason)) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot OPTIMIZE table"; if (!disable_reason.empty()) message << ": " << disable_reason; @@ -1052,6 +1054,7 @@ bool StorageMergeTree::optimize( if (!merge(true, partition_id, final, deduplicate, &disable_reason)) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot OPTIMIZE table"; if (!disable_reason.empty()) message << ": " << disable_reason; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7412031c595..9065bfd2d1d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -460,6 +460,7 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( if (!inactive_replicas.empty()) { std::stringstream exception_message; + exception_message.exceptions(std::ios::failbit); exception_message << "Mutation is not finished because"; if (!inactive_replicas.empty()) @@ -1017,6 +1018,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) parts_to_fetch_blocks += get_blocks_count_in_data_part(name); std::stringstream sanity_report; + sanity_report.exceptions(std::ios::failbit); sanity_report << "There are " << unexpected_parts.size() << " unexpected parts with " << unexpected_parts_rows << " rows (" << unexpected_parts_nonnew << " of them is not just-written with " << unexpected_parts_rows << " rows), " @@ -1041,6 +1043,7 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) if (insane && !skip_sanity_checks) { std::stringstream why; + why.exceptions(std::ios::failbit); why << "The local set of parts of table " << getStorageID().getNameForLogs() << " doesn't look like the set of parts " << "in ZooKeeper: " << formatReadableQuantity(unexpected_parts_rows) << " rows of " << formatReadableQuantity(total_rows_on_filesystem) @@ -1342,6 +1345,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) // Log source part names just in case { std::stringstream source_parts_msg; + source_parts_msg.exceptions(std::ios::failbit); for (auto i : ext::range(0, entry.source_parts.size())) source_parts_msg << (i != 0 ? ", " : "") << entry.source_parts[i]; @@ -3824,6 +3828,7 @@ bool StorageReplicatedMergeTree::optimize( if (!selected) { std::stringstream message; + message.exceptions(std::ios::failbit); message << "Cannot select parts for optimization"; if (!disable_reason.empty()) message << ": " << disable_reason; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index e4228c0d4ec..67a81045f2e 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -255,6 +255,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & if (!outcome.IsSuccess()) { std::ostringstream message; + message.exceptions(std::ios::failbit); message << "Could not list objects in bucket " << quoteString(request.GetBucket()) << " with prefix " << quoteString(request.GetPrefix()); diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index 9f0b81263e3..70c67683b25 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -97,6 +97,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & auth_params_json.set("server", authentication.getServerName()); std::ostringstream oss; + oss.exceptions(std::ios::failbit); Poco::JSON::Stringifier::stringify(auth_params_json, oss); const auto str = oss.str(); diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 8de14b53471..e29cfabf328 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -128,6 +128,7 @@ std::string readData(DB::StoragePtr & table, const DB::Context & context) } std::ostringstream ss; + ss.exceptions(std::ios::failbit); WriteBufferFromOStream out_buf(ss); BlockOutputStreamPtr output = FormatFactory::instance().getOutput("Values", out_buf, sample, context); diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 2556cd10648..3148ab1112a 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -221,6 +221,7 @@ String transformQueryForExternalDatabase( dropAliases(select_ptr); std::stringstream out; + out.exceptions(std::ios::failbit); IAST::FormatSettings settings(out, true); settings.identifier_quoting_style = identifier_quoting_style; settings.always_quote_identifiers = identifier_quoting_style != IdentifierQuotingStyle::None; diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 2e34e82ce36..22a07a4d284 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -243,6 +243,7 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); std::stringstream ss; + ss.exceptions(std::ios::failbit); ss << "Table function '" << name + "' requires from 2 to " << (is_cluster_function ? 3 : 5) << " parameters" << ": , , " << (is_cluster_function ? "" : ", [username, [password]]."); From d564e4efade0d9e306cb56b50a97e1bac0682368 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 03:29:49 +0300 Subject: [PATCH 64/92] Fix build --- src/Functions/abtesting.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/abtesting.cpp b/src/Functions/abtesting.cpp index 25c0abbdded..051b1f6f0ef 100644 --- a/src/Functions/abtesting.cpp +++ b/src/Functions/abtesting.cpp @@ -140,7 +140,7 @@ String convertToJson(const PODArray & variant_names, const Variants & va { FormatSettings settings; - WriteBufferOwnString buf; + WriteBufferFromOwnString buf; writeCString("{\"data\":[", buf); for (size_t i = 0; i < variants.size(); ++i) From 381ed50befcce99ab0f82dd93aec87e293dde298 Mon Sep 17 00:00:00 2001 From: MyroTk Date: Sat, 7 Nov 2020 01:39:22 +0100 Subject: [PATCH 65/92] Enabling existing testflows tests. --- tests/testflows/rbac/tests/privileges/feature.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/testflows/rbac/tests/privileges/feature.py b/tests/testflows/rbac/tests/privileges/feature.py index 81af29a70e1..bc63824d322 100755 --- a/tests/testflows/rbac/tests/privileges/feature.py +++ b/tests/testflows/rbac/tests/privileges/feature.py @@ -7,7 +7,7 @@ from rbac.helper.common import * def feature(self): tasks = [] - pool = Pool(10) + pool = Pool(16) try: try: @@ -21,6 +21,12 @@ def feature(self): run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_constraint", "feature"), flags=TE), {}) run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_ttl", "feature"), flags=TE), {}) run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_settings", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_update", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_delete", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_freeze", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_fetch", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.alter.alter_move", "feature"), flags=TE), {}) + run_scenario(pool, tasks, Feature(test=load("rbac.tests.privileges.grant_option", "feature"), flags=TE), {}) finally: join(tasks) finally: From d78d07f825c9001b7877b119e1832478b21da512 Mon Sep 17 00:00:00 2001 From: MyroTk Date: Sat, 7 Nov 2020 01:48:29 +0100 Subject: [PATCH 66/92] xfails for ALTER MOVE --- tests/testflows/rbac/regression.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/testflows/rbac/regression.py b/tests/testflows/rbac/regression.py index c0307664061..a53de0178eb 100755 --- a/tests/testflows/rbac/regression.py +++ b/tests/testflows/rbac/regression.py @@ -19,6 +19,7 @@ issue_14674 = "https://github.com/ClickHouse/ClickHouse/issues/14674" issue_14810 = "https://github.com/ClickHouse/ClickHouse/issues/14810" issue_15165 = "https://github.com/ClickHouse/ClickHouse/issues/15165" issue_15980 = "https://github.com/ClickHouse/ClickHouse/issues/15980" +issue_16403 = "https://github.com/ClickHouse/ClickHouse/issues/16403" xfails = { "syntax/show create quota/I show create quota current": @@ -89,6 +90,12 @@ xfails = { [(Fail, ".inner table is not created as expected")], "views/materialized view/select from source table privilege granted directly or via role/select from implicit target table, privilege granted through a role": [(Fail, ".inner table is not created as expected")], + "privileges/alter move/:/:/:/:/move partition to implicit target table of a materialized view": + [(Fail, ".inner table is not created as expected")], + "privileges/alter move/:/:/:/:/user without ALTER MOVE PARTITION privilege/": + [(Fail, issue_16403)], + "privileges/alter move/:/:/:/:/user with revoked ALTER MOVE PARTITION privilege/": + [(Fail, issue_16403)], } xflags = { From 85d1f37dde05a24367a769e84839c52b14bee7d6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Nov 2020 13:27:56 +0300 Subject: [PATCH 67/92] Update sumwithoverflow.md --- .../aggregate-functions/reference/sumwithoverflow.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md index fa603b4b155..1b39e9d0eb1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md @@ -4,6 +4,6 @@ toc_priority: 140 # sumWithOverflow {#sumwithoverflowx} -Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, the function returns an error. +Computes the sum of the numbers, using the same data type for the result as for the input parameters. If the sum exceeds the maximum value for this data type, it is calculated with overflow. Only works for numbers. From ca7ef4fb90cc61e21ab20e40b60e75031a1e5b81 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Nov 2020 13:50:23 +0300 Subject: [PATCH 68/92] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index d08e7833b33..29d4dc9ba48 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -68,6 +68,7 @@ toc_title: Adopters | Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | | OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | | Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | | Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | | PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | | Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | From c2e6ddddbd7cb35122576a25513e11827c3cdf20 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Nov 2020 13:54:36 +0300 Subject: [PATCH 69/92] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 29d4dc9ba48..89b64bfecb8 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -45,6 +45,7 @@ toc_title: Adopters | FunCorp | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | | Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | | Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | From 031dc4166850588da2962ba50b98a30e77657ce4 Mon Sep 17 00:00:00 2001 From: ArtCorp <35217823+ArtCorp@users.noreply.github.com> Date: Sat, 7 Nov 2020 16:14:52 +0400 Subject: [PATCH 70/92] Inconvenient navigation Hi, I think more links are needed. --- docs/ru/engines/table-engines/special/buffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 986fe9adbb9..6cd4c58b805 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться. -Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел «Производительность»). +Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](https://clickhouse.tech/docs/ru/introduction/performance/). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) From c3973123bdd930692e4d40c81292edc028d191e9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 19:03:23 +0300 Subject: [PATCH 71/92] Fix error --- src/Common/HTMLForm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/HTMLForm.h b/src/Common/HTMLForm.h index f9b5cc0c520..2490d613160 100644 --- a/src/Common/HTMLForm.h +++ b/src/Common/HTMLForm.h @@ -17,14 +17,12 @@ struct HTMLForm : public Poco::Net::HTMLForm { Poco::URI uri(request.getURI()); std::istringstream istr(uri.getRawQuery()); - istr.exceptions(std::ios::failbit); readUrl(istr); } HTMLForm(const Poco::URI & uri) { std::istringstream istr(uri.getRawQuery()); - istr.exceptions(std::ios::failbit); readUrl(istr); } From 8b8383d3755e9ab30e10545ad6cd3a7cd5fdccf4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 19:08:19 +0300 Subject: [PATCH 72/92] Fix error --- src/Common/ThreadProfileEvents.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Common/ThreadProfileEvents.cpp b/src/Common/ThreadProfileEvents.cpp index cb519c9b928..7b94ca0f2b2 100644 --- a/src/Common/ThreadProfileEvents.cpp +++ b/src/Common/ThreadProfileEvents.cpp @@ -398,8 +398,7 @@ bool PerfEventsCounters::processThreadLocalChanges(const std::string & needed_ev return true; } -// Parse comma-separated list of event names. Empty means all available -// events. +// Parse comma-separated list of event names. Empty means all available events. std::vector PerfEventsCounters::eventIndicesFromString(const std::string & events_list) { std::vector result; @@ -415,12 +414,10 @@ std::vector PerfEventsCounters::eventIndicesFromString(const std::string } std::istringstream iss(events_list); - iss.exceptions(std::ios::failbit); std::string event_name; while (std::getline(iss, event_name, ',')) { - // Allow spaces at the beginning of the token, so that you can write - // 'a, b'. + // Allow spaces at the beginning of the token, so that you can write 'a, b'. event_name.erase(0, event_name.find_first_not_of(' ')); auto entry = event_name_to_index.find(event_name); From 80fb30c06def7ad4e9d4a1c8d4e005a97ff17456 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 19:09:15 +0300 Subject: [PATCH 73/92] Fix error --- src/Common/parseGlobs.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/parseGlobs.cpp b/src/Common/parseGlobs.cpp index 19a9e9d50b6..f04ad1ea8a0 100644 --- a/src/Common/parseGlobs.cpp +++ b/src/Common/parseGlobs.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -49,7 +48,6 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob std::istringstream iss_range(buffer); iss_range.exceptions(std::ios::failbit); iss_range >> range_begin >> point >> point >> range_end; - assert(!iss_range.fail()); bool leading_zeros = buffer[0] == '0'; size_t num_len = std::to_string(range_end).size(); if (leading_zeros) From 5e5779adfd2724b09060737e4126a80775eeb71a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 19:11:20 +0300 Subject: [PATCH 74/92] Fix error --- src/DataTypes/tests/gtest_data_type_get_common_type.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp index 5256d49ba6d..8212555e8bc 100644 --- a/src/DataTypes/tests/gtest_data_type_get_common_type.cpp +++ b/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -27,7 +27,6 @@ static auto typeFromString(const std::string & str) static auto typesFromString(const std::string & str) { std::istringstream data_types_stream(str); - data_types_stream.exceptions(std::ios::failbit); DataTypes data_types; std::string data_type; while (data_types_stream >> data_type) From 941ee1a2141f96d2716a31e54e213378efacc2d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 22:02:22 +0300 Subject: [PATCH 75/92] Fix error --- src/Common/tests/gtest_getMultipleValuesFromConfig.cpp | 2 -- src/Common/tests/gtest_sensitive_data_masker.cpp | 7 ------- 2 files changed, 9 deletions(-) diff --git a/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp b/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp index 743091e7276..4756043acbf 100644 --- a/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp +++ b/src/Common/tests/gtest_getMultipleValuesFromConfig.cpp @@ -19,8 +19,6 @@ TEST(Common, getMultipleValuesFromConfig) )END"); - xml_isteam.exceptions(std::ios::failbit); - Poco::AutoPtr config = new Poco::Util::XMLConfiguration(xml_isteam); std::vector answer = getMultipleValuesFromConfig(*config, "first_level", "second_level"); std::vector right_answer = {"0", "1", "2", "3"}; diff --git a/src/Common/tests/gtest_sensitive_data_masker.cpp b/src/Common/tests/gtest_sensitive_data_masker.cpp index 43d7b9499a6..67ad5be2f52 100644 --- a/src/Common/tests/gtest_sensitive_data_masker.cpp +++ b/src/Common/tests/gtest_sensitive_data_masker.cpp @@ -135,8 +135,6 @@ TEST(Common, SensitiveDataMasker) )END"); - xml_isteam.exceptions(std::ios::failbit); - Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam); DB::SensitiveDataMasker masker_xml_based(*xml_config, "query_masking_rules"); std::string top_secret = "The e-mail of IVAN PETROV is kotik1902@sdsdf.test, and the password is qwerty123"; @@ -168,7 +166,6 @@ TEST(Common, SensitiveDataMasker) )END"); - xml_isteam_bad.exceptions(std::ios::failbit); Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); @@ -191,8 +188,6 @@ TEST(Common, SensitiveDataMasker) )END"); - xml_isteam_bad.exceptions(std::ios::failbit); - Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); @@ -215,8 +210,6 @@ TEST(Common, SensitiveDataMasker) )END"); - xml_isteam_bad.exceptions(std::ios::failbit); - Poco::AutoPtr xml_config = new Poco::Util::XMLConfiguration(xml_isteam_bad); DB::SensitiveDataMasker masker_xml_based_exception_check(*xml_config, "query_masking_rules"); From 5a403fcacff5b955980b301a2a7db5a153c80e53 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 22:11:27 +0300 Subject: [PATCH 76/92] Remove old test --- ...5_system_distribution_queue_mask.reference | 4 --- .../01555_system_distribution_queue_mask.sql | 36 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.reference delete mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.sql diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference deleted file mode 100644 index bd0eac10816..00000000000 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference +++ /dev/null @@ -1,4 +0,0 @@ -masked -3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" -no masking -1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql deleted file mode 100644 index 0143b8e46ed..00000000000 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ /dev/null @@ -1,36 +0,0 @@ --- force data path with the user/pass in it -set use_compact_format_in_distributed_parts_names=0; --- use async send even for localhost -set prefer_localhost_replica=0; - -drop table if exists dist_01555; -drop table if exists data_01555; -create table data_01555 (key Int) Engine=Null(); - --- --- masked --- -SELECT 'masked'; -create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); - -insert into dist_01555 values (1)(2); --- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -- { serverError 516; } -select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; - -drop table dist_01555; - --- --- no masking --- -SELECT 'no masking'; -create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); - -insert into dist_01555 values (1)(2); --- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; - --- cleanup -drop table dist_01555; -drop table data_01555; From bfb330452c608a7c0a3b42d5c818a8999e69d937 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Nov 2020 22:11:27 +0300 Subject: [PATCH 77/92] Remove old test --- ...5_system_distribution_queue_mask.reference | 4 --- .../01555_system_distribution_queue_mask.sql | 36 ------------------- 2 files changed, 40 deletions(-) delete mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.reference delete mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.sql diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference deleted file mode 100644 index bd0eac10816..00000000000 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference +++ /dev/null @@ -1,4 +0,0 @@ -masked -3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" -no masking -1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql deleted file mode 100644 index 0143b8e46ed..00000000000 --- a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql +++ /dev/null @@ -1,36 +0,0 @@ --- force data path with the user/pass in it -set use_compact_format_in_distributed_parts_names=0; --- use async send even for localhost -set prefer_localhost_replica=0; - -drop table if exists dist_01555; -drop table if exists data_01555; -create table data_01555 (key Int) Engine=Null(); - --- --- masked --- -SELECT 'masked'; -create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); - -insert into dist_01555 values (1)(2); --- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -- { serverError 516; } -select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; - -drop table dist_01555; - --- --- no masking --- -SELECT 'no masking'; -create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); - -insert into dist_01555 values (1)(2); --- since test_cluster_with_incorrect_pw contains incorrect password ignore error -system flush distributed dist_01555; -select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; - --- cleanup -drop table dist_01555; -drop table data_01555; From c69c1a299a2b2ecec8da76dfef0166f01968062a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Nov 2020 22:39:35 +0300 Subject: [PATCH 78/92] Update buffer.md --- docs/ru/engines/table-engines/special/buffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 6cd4c58b805..75ce12f50fa 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -64,6 +64,6 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Таблицы типа Buffer используются в тех случаях, когда от большого количества серверов поступает слишком много INSERT-ов в единицу времени, и нет возможности заранее самостоятельно буферизовать данные перед вставкой, в результате чего, INSERT-ы не успевают выполняться. -Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](https://clickhouse.tech/docs/ru/introduction/performance/). +Заметим, что даже для таблиц типа Buffer не имеет смысла вставлять данные по одной строке, так как таким образом будет достигнута скорость всего лишь в несколько тысяч строк в секунду, тогда как при вставке более крупными блоками, достижимо более миллиона строк в секунду (смотрите раздел [«Производительность»](../../../introduction/performance/). [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/buffer/) From 1ea3afadbcf9f289ed54f635b45685faa953e3ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Nov 2020 00:28:39 +0300 Subject: [PATCH 79/92] Merge with master --- .../ClusterProxy/executeQuery.cpp | 2 +- src/Storages/StorageDistributed.cpp | 26 ------------------- src/Storages/StorageProxy.h | 6 +++-- src/Storages/StorageTableFunction.h | 2 +- 4 files changed, 6 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 2bb9d820bd5..24d819ad2d7 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -92,7 +92,7 @@ Pipe executeQuery( const std::string query = queryToString(query_ast); - Context new_context = updateSettingsForCluster(*cluster, context, settings, log); + Context new_context = updateSettingsForCluster(*query_info.cluster, context, settings, log); ThrottlerPtr user_level_throttler; if (auto * process_list_element = context.getProcessListElement()) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 947c5b27d98..c577cc83c7e 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -419,32 +419,6 @@ StorageDistributed::StorageDistributed( remote_table_function_ptr = std::move(remote_table_function_ptr_); } -StoragePtr StorageDistributed::createWithOwnCluster( - const StorageID & table_id_, - const ColumnsDescription & columns_, - const String & remote_database_, /// database on remote servers. - const String & remote_table_, /// The name of the table on the remote servers. - ClusterPtr owned_cluster_, - const Context & context_) -{ - auto res = create(table_id_, columns_, ConstraintsDescription{}, remote_database_, remote_table_, String{}, context_, ASTPtr(), String(), String(), false); - res->owned_cluster = std::move(owned_cluster_); - return res; -} - - -StoragePtr StorageDistributed::createWithOwnCluster( - const StorageID & table_id_, - const ColumnsDescription & columns_, - ASTPtr & remote_table_function_ptr_, - ClusterPtr & owned_cluster_, - const Context & context_) -{ - auto res = create(table_id_, columns_, ConstraintsDescription{}, remote_table_function_ptr_, String{}, context_, ASTPtr(), String(), String(), false); - res->owned_cluster = owned_cluster_; - return res; -} - QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { const auto & settings = context.getSettingsRef(); diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index e2a6438ecfe..f50235b7e7c 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -31,7 +31,9 @@ public: ColumnSizeByName getColumnSizes() const override { return getNested()->getColumnSizes(); } NamesAndTypesList getVirtuals() const override { return getNested()->getVirtuals(); } - QueryProcessingStage::Enum getQueryProcessingStage(const Context & context, QueryProcessingStage::Enum to_stage, const ASTPtr & ast) const override + + QueryProcessingStage::Enum getQueryProcessingStage( + const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & ast) const override { return getNested()->getQueryProcessingStage(context, to_stage, ast); } @@ -50,7 +52,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 9b698cb3954..e0488ecf06e 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -72,7 +72,7 @@ public: Pipe read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, - const SelectQueryInfo & query_info, + SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, From ba4ae00121e1521ea8297447d94d6ca34b7064d4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Nov 2020 00:30:40 +0300 Subject: [PATCH 80/92] Whitespace --- src/Storages/StorageDistributed.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index c577cc83c7e..ecdc1cdd724 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -419,7 +419,8 @@ StorageDistributed::StorageDistributed( remote_table_function_ptr = std::move(remote_table_function_ptr_); } -QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const +QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( + const Context & context, QueryProcessingStage::Enum to_stage, SelectQueryInfo & query_info) const { const auto & settings = context.getSettingsRef(); auto metadata_snapshot = getInMemoryMetadataPtr(); From 6eda689aa34fe523de08421ed4e9489673691fee Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 8 Nov 2020 00:55:34 +0300 Subject: [PATCH 81/92] Update adopters.md --- docs/en/introduction/adopters.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 89b64bfecb8..b367a97771a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -36,6 +36,7 @@ toc_title: Adopters | Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | | Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | | Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | | Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | | Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | | eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | From 4e03daac76b76dd965391cef681d1f9c500b132a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 8 Nov 2020 01:19:50 +0300 Subject: [PATCH 82/92] Update CMakeLists.txt --- programs/odbc-bridge/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 8e7ccfedc70..11864354619 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -24,6 +24,7 @@ add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) target_link_libraries(clickhouse-odbc-bridge PRIVATE daemon dbms + clickhouse_parsers Poco::Data Poco::Data::ODBC ) From 7c62dbb6dce74378a3b17b5ed61c279bc9d890a9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Nov 2020 01:23:19 +0300 Subject: [PATCH 83/92] Fix error --- src/Parsers/ParserCreateQuery.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index e09bab30ff3..fbdc308d5bc 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -165,12 +165,12 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E ASTPtr codec_expression; ASTPtr ttl_expression; - if (require_type || - ( !s_default.checkWithoutMoving(pos, expected) + if (!s_default.checkWithoutMoving(pos, expected) && !s_materialized.checkWithoutMoving(pos, expected) && !s_alias.checkWithoutMoving(pos, expected) - && !s_comment.checkWithoutMoving(pos, expected) - && !s_codec.checkWithoutMoving(pos, expected))) + && (require_type + || (!s_comment.checkWithoutMoving(pos, expected) + && !s_codec.checkWithoutMoving(pos, expected)))) { if (!type_parser.parse(pos, type, expected)) return false; From 04db0834bfe89645160b0a152b0f210cec3cd68e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 8 Nov 2020 02:44:35 +0300 Subject: [PATCH 84/92] Apply use_compact_format_in_distributed_parts_names for each INSERT (with internal_replication) Before this patch use_compact_format_in_distributed_parts_names was applied only from default profile (at server start) for internal_replication=1, and was ignored on INSERT. --- src/Interpreters/Cluster.cpp | 59 +++++++++++-------- src/Interpreters/Cluster.h | 28 +++++++-- .../DistributedBlockOutputStream.cpp | 9 ++- 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 9c2766ae7d6..61ad4258b90 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -46,6 +46,14 @@ inline bool isLocalImpl(const Cluster::Address & address, const Poco::Net::Socke return address.default_database.empty() && isLocalAddress(resolved_address, clickhouse_port); } +void concatInsertPath(std::string & insert_path, const std::string & dir_name) +{ + if (insert_path.empty()) + insert_path = dir_name; + else + insert_path += "," + dir_name; +} + } /// Implementation of Cluster::Address class @@ -358,9 +366,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, bool internal_replication = config.getBool(partial_prefix + ".internal_replication", false); - /// In case of internal_replication we will be appending names to dir_name_for_internal_replication - std::string dir_name_for_internal_replication; - std::string dir_name_for_internal_replication_with_local; + ShardInfoInsertPathForInternalReplication insert_paths; for (const auto & replica_key : replica_keys) { @@ -379,18 +385,20 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, if (internal_replication) { - auto dir_name = replica_addresses.back().toFullString(settings.use_compact_format_in_distributed_parts_names); - if (!replica_addresses.back().is_local) + /// use_compact_format=0 { - if (dir_name_for_internal_replication.empty()) - dir_name_for_internal_replication = dir_name; - else - dir_name_for_internal_replication += "," + dir_name; + auto dir_name = replica_addresses.back().toFullString(0 /* use_compact_format */); + if (!replica_addresses.back().is_local) + concatInsertPath(insert_paths.prefer_localhost_replica, dir_name); + concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name); + } + /// use_compact_format=1 + { + auto dir_name = replica_addresses.back().toFullString(1 /* use_compact_format */); + if (!replica_addresses.back().is_local) + concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name); + concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name); } - if (dir_name_for_internal_replication_with_local.empty()) - dir_name_for_internal_replication_with_local = dir_name; - else - dir_name_for_internal_replication_with_local += "," + dir_name; } } else @@ -425,8 +433,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); shards_info.push_back({ - std::move(dir_name_for_internal_replication), - std::move(dir_name_for_internal_replication_with_local), + std::move(insert_paths), current_shard_num, weight, std::move(shard_local_addresses), @@ -485,8 +492,7 @@ Cluster::Cluster(const Settings & settings, const std::vector; using AddressesWithFailover = std::vector; + /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication + /// + /// Contains different path for permutations of: + /// - prefer_localhost_replica + /// Notes with prefer_localhost_replica==0 will contains local nodes. + /// - use_compact_format_in_distributed_parts_names + /// See toFullString() + /// + /// This is cached to avoid looping by replicas in insertPathForInternalReplication(). + struct ShardInfoInsertPathForInternalReplication + { + /// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=0 + std::string prefer_localhost_replica; + /// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=0 + std::string no_prefer_localhost_replica; + /// prefer_localhost_replica == 1 && use_compact_format_in_distributed_parts_names=1 + std::string prefer_localhost_replica_compact; + /// prefer_localhost_replica == 0 && use_compact_format_in_distributed_parts_names=1 + std::string no_prefer_localhost_replica_compact; + }; + struct ShardInfo { public: @@ -141,13 +162,10 @@ public: size_t getLocalNodeCount() const { return local_addresses.size(); } bool hasInternalReplication() const { return has_internal_replication; } /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication - const std::string & pathForInsert(bool prefer_localhost_replica) const; + const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; public: - /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && prefer_localhost_replica - std::string dir_name_for_internal_replication; - /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication && !prefer_localhost_replica - std::string dir_name_for_internal_replication_with_local; + ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication; /// Number of the shard, the indexation begins with 1 UInt32 shard_num = 0; UInt32 weight = 1; diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index f08cdf76cbf..d524d0354ae 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -523,7 +523,14 @@ void DistributedBlockOutputStream::writeAsyncImpl(const Block & block, const siz /// Prefer insert into current instance directly writeToLocal(block, shard_info.getLocalNodeCount()); else - writeToShard(block, {shard_info.pathForInsert(settings.prefer_localhost_replica)}); + { + const auto & path = shard_info.insertPathForInternalReplication( + settings.prefer_localhost_replica, + settings.use_compact_format_in_distributed_parts_names); + if (path.empty()) + throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); + writeToShard(block, {path}); + } } else { From 34d882b7d378c534b8c5a75320db22856eb03e09 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 8 Nov 2020 02:21:29 +0300 Subject: [PATCH 85/92] Get back 01555_system_distribution_queue_mask This reverts commit bfb330452c608a7c0a3b42d5c818a8999e69d937. Since now use_compact_format_in_distributed_parts_names works on fly for internal_replication=true. --- ...5_system_distribution_queue_mask.reference | 4 +++ .../01555_system_distribution_queue_mask.sql | 36 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.reference create mode 100644 tests/queries/0_stateless/01555_system_distribution_queue_mask.sql diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference new file mode 100644 index 00000000000..bd0eac10816 --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.reference @@ -0,0 +1,4 @@ +masked +3,"default:*@127%2E0%2E0%2E1:9000,default:*@127%2E0%2E0%2E2:9000" +no masking +1,"default@localhost:9000" diff --git a/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql new file mode 100644 index 00000000000..0143b8e46ed --- /dev/null +++ b/tests/queries/0_stateless/01555_system_distribution_queue_mask.sql @@ -0,0 +1,36 @@ +-- force data path with the user/pass in it +set use_compact_format_in_distributed_parts_names=0; +-- use async send even for localhost +set prefer_localhost_replica=0; + +drop table if exists dist_01555; +drop table if exists data_01555; +create table data_01555 (key Int) Engine=Null(); + +-- +-- masked +-- +SELECT 'masked'; +create table dist_01555 (key Int) Engine=Distributed(test_cluster_with_incorrect_pw, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; -- { serverError 516; } +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +drop table dist_01555; + +-- +-- no masking +-- +SELECT 'no masking'; +create table dist_01555 (key Int) Engine=Distributed(test_shard_localhost, currentDatabase(), data_01555, key); + +insert into dist_01555 values (1)(2); +-- since test_cluster_with_incorrect_pw contains incorrect password ignore error +system flush distributed dist_01555; +select length(splitByChar('*', data_path)), replaceRegexpOne(data_path, '^.*/([^/]*)/' , '\\1') from system.distribution_queue where database = currentDatabase() and table = 'dist_01555' format CSV; + +-- cleanup +drop table dist_01555; +drop table data_01555; From a333f9f6f4dca607217bd74e2a55535870a75338 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 8 Nov 2020 14:21:58 +0800 Subject: [PATCH 86/92] Update tests/queries/0_stateless/01540_verbatim_partition_pruning.sql Co-authored-by: Azat Khuzhin --- .../0_stateless/01540_verbatim_partition_pruning.sql | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql index bde90422dbc..7695d20fdee 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -11,4 +11,15 @@ select * from xy where intHash64(x) % 100 = intHash64(1) % 100; -- This works too select * from xy where x = 1; +-- +-- Test for equality. +-- It is special operator that treated as an always monotonic +-- +set max_rows_to_read=100; +drop table if exists xy; +create table xy(x int, y int) engine MergeTree partition by x % 100 order by y settings index_granularity = 4096; +-- insert enough data to make minmax index not enough for partition prunning +insert into xy select number, number from numbers(10000); +select * from xy where x = 1; + drop table if exists xy; From ecf10817de6bc08b4e9a21a1f33b3d435094bd37 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 8 Nov 2020 11:21:07 +0300 Subject: [PATCH 87/92] Fix clang-tidy checks in Cluster.cpp --- src/Interpreters/Cluster.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index 61ad4258b90..218502e7f43 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -387,14 +387,14 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, { /// use_compact_format=0 { - auto dir_name = replica_addresses.back().toFullString(0 /* use_compact_format */); + auto dir_name = replica_addresses.back().toFullString(false /* use_compact_format */); if (!replica_addresses.back().is_local) concatInsertPath(insert_paths.prefer_localhost_replica, dir_name); concatInsertPath(insert_paths.no_prefer_localhost_replica, dir_name); } /// use_compact_format=1 { - auto dir_name = replica_addresses.back().toFullString(1 /* use_compact_format */); + auto dir_name = replica_addresses.back().toFullString(true /* use_compact_format */); if (!replica_addresses.back().is_local) concatInsertPath(insert_paths.prefer_localhost_replica_compact, dir_name); concatInsertPath(insert_paths.no_prefer_localhost_replica_compact, dir_name); @@ -620,7 +620,7 @@ const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool pr if (!has_internal_replication) throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); - auto & paths = insert_path_for_internal_replication; + const auto & paths = insert_path_for_internal_replication; if (!use_compact_format) { if (prefer_localhost_replica) From 46507c8cffba7ac11602b4dbfa561a6c8b716b27 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sun, 8 Nov 2020 17:11:02 +0800 Subject: [PATCH 88/92] Better test --- ...01540_verbatim_partition_pruning.reference | 5 ++-- .../01540_verbatim_partition_pruning.sql | 30 +++++++++---------- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference index 46256e7cdab..90888a9eaf5 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.reference @@ -1,2 +1,3 @@ -1 2 -1 2 +2 3 +9 5 +8 4 diff --git a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql index 7695d20fdee..2ef9c9e8917 100644 --- a/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql +++ b/tests/queries/0_stateless/01540_verbatim_partition_pruning.sql @@ -1,25 +1,23 @@ drop table if exists xy; -create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 100 order by y settings index_granularity = 1; +create table xy(x int, y int) engine MergeTree partition by intHash64(x) % 2 order by y settings index_granularity = 1; -insert into xy values (1, 2), (2, 3); +-- intHash64(0) % 2 = 0 +-- intHash64(2) % 2 = 1 +-- intHash64(8) % 2 = 0 +-- intHash64(9) % 2 = 1 +insert into xy values (0, 2), (2, 3), (8, 4), (9, 5); -SET max_rows_to_read = 1; +-- Now we have two partitions: 0 and 1, each of which contains 2 values. +-- minmax index for the first partition is 0 <= x <= 8 +-- minmax index for the second partition is 2 <= x <= 9 -select * from xy where intHash64(x) % 100 = intHash64(1) % 100; +SET max_rows_to_read = 2; --- This works too -select * from xy where x = 1; +select * from xy where intHash64(x) % 2 = intHash64(2) % 2; --- --- Test for equality. --- It is special operator that treated as an always monotonic --- -set max_rows_to_read=100; -drop table if exists xy; -create table xy(x int, y int) engine MergeTree partition by x % 100 order by y settings index_granularity = 4096; --- insert enough data to make minmax index not enough for partition prunning -insert into xy select number, number from numbers(10000); -select * from xy where x = 1; +-- Equality is another special operator that can be treated as an always monotonic indicator for deterministic functions. +-- minmax index is not enough. +select * from xy where x = 8; drop table if exists xy; From f07988eec79ac8ba38b34a417dc305e60919d39a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Nov 2020 21:17:30 +0300 Subject: [PATCH 89/92] Remove perf test of sumbur hash, because we do not care --- tests/performance/consistent_hashes.xml | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/performance/consistent_hashes.xml b/tests/performance/consistent_hashes.xml index 087187497ed..3610579f545 100644 --- a/tests/performance/consistent_hashes.xml +++ b/tests/performance/consistent_hashes.xml @@ -18,7 +18,4 @@ SELECT {hash_func}(number, {buckets}) FROM numbers(10000000) FORMAT Null - - - SELECT sumburConsistentHash(toUInt32(number), 2) FROM numbers(10000000) FORMAT Null From f4afb4643373aa1d750fc33afa33296d29716ce9 Mon Sep 17 00:00:00 2001 From: Anna Shakhova <72564598+annvsh@users.noreply.github.com> Date: Mon, 9 Nov 2020 02:42:28 +0700 Subject: [PATCH 90/92] DOCSUP-3123: Edited the ReplicatedMergeTree and Polygon dictionaries (#16110) * #13573 * #9278 * Fixed * Fixed * Fixed link * Update docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md * Update docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md Co-authored-by: BayoNet --- .../settings.md | 2 +- .../external-dicts-dict-polygon.md | 91 +++++++++++++++++++ .../settings.md | 16 ++++ .../external-dicts-dict-polygon.md | 2 +- 4 files changed, 109 insertions(+), 2 deletions(-) create mode 100644 docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 31a8e896438..a37ae685368 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -571,7 +571,7 @@ For more information, see the MergeTreeSettings.h header file. Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). -This setting has higher priority. +This setting has a higher priority. For more information, see the MergeTreeSettings.h header file. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md new file mode 100644 index 00000000000..93b9b340e89 --- /dev/null +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -0,0 +1,91 @@ +--- +toc_priority: 46 +toc_title: Polygon Dictionaries With Grids +--- + + +# Polygon dictionaries {#polygon-dictionaries} + +Polygon dictionaries allow you to efficiently search for the polygon containing specified points. +For example: defining a city area by geographical coordinates. + +Example configuration: + +``` xml + + + + key + Array(Array(Array(Array(Float64)))) + + + + name + String + + + + + value + UInt64 + 0 + + + + + + + + + +``` + +Tne corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md#create-dictionary-query): +``` sql +CREATE DICTIONARY polygon_dict_name ( + key Array(Array(Array(Array(Float64)))), + name String, + value UInt64 +) +PRIMARY KEY key +LAYOUT(POLYGON()) +... +``` + +When configuring the polygon dictionary, the key must have one of two types: +- A simple polygon. It is an array of points. +- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. + +Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. + +The user can [upload their own data](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md) in all formats supported by ClickHouse. + + +There are 3 types of [in-memory storage](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) available: + +- POLYGON_SIMPLE. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. + +- POLYGON_INDEX_EACH. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). +Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. +The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. +The division stops when the recursion depth reaches MAX_DEPTH or when the cell crosses no more than MIN_INTERSECTIONS polygons. +To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. + +- POLYGON_INDEX_CELL. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. + +- POLYGON. Synonym to POLYGON_INDEX_CELL. + +Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries. +An important difference is that here the keys will be the points for which you want to find the polygon containing them. + +Example of working with the dictionary defined above: +``` sql +CREATE TABLE points ( + x Float64, + y Float64 +) +... +SELECT tuple(x, y) AS key, dictGet(dict_name, 'name', key), dictGet(dict_name, 'value', key) FROM points ORDER BY x, y; +``` + +As a result of executing the last command for each point in the 'points' table, a minimum area polygon containing this point will be found, and the requested attributes will be output. diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 2a23a27f4c0..2745718381b 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -555,6 +555,22 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` +## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree} + +Тонкая настройка таблиц в [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). + +Эта настройка имеет более высокий приоритет. + +Подробнее смотрите в заголовочном файле MergeTreeSettings.h. + +**Пример** + +``` xml + + 5 + +``` + ## openSSL {#server_configuration_parameters-openssl} Настройки клиента/сервера SSL. diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md index bfd8663a754..70839d21a78 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md @@ -1,4 +1,4 @@ -# Cловари полигонов {#slovari-polygonov} +# Cловари полигонов {#polygon-dictionaries} Словари полигонов позволяют эффективно искать полигон, в который попадают данные точки, среди множества полигонов. Для примера: определение района города по географическим координатам. From ff906369eafc8b56895b5b7c91e7cd0a7a4c79dd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 8 Nov 2020 23:22:38 +0300 Subject: [PATCH 91/92] Adjust time for perf test --- tests/performance/quantile_merge.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/quantile_merge.xml b/tests/performance/quantile_merge.xml index 7f4d85a254c..0ddb688d8eb 100644 --- a/tests/performance/quantile_merge.xml +++ b/tests/performance/quantile_merge.xml @@ -1,3 +1,3 @@ - SELECT quantileMerge(arrayJoin(arrayMap(x -> state, range(1000000)))) FROM (SELECT quantileState(rand()) AS state FROM numbers(10000)) + SELECT quantileMerge(arrayJoin(arrayMap(x -> state, range(500000)))) FROM (SELECT quantileState(rand()) AS state FROM numbers(10000)) From 4eb684603aeacb2fe637bc4551619213367342f3 Mon Sep 17 00:00:00 2001 From: sevirov <72220289+sevirov@users.noreply.github.com> Date: Mon, 9 Nov 2020 11:21:27 +0300 Subject: [PATCH 92/92] Update order-by.md (#16770) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Заново сделал изменения в английскую и русскую версии. Co-authored-by: Dmitriy --- .../statements/select/order-by.md | 82 +++++++++++++++++++ .../statements/select/order-by.md | 82 +++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index a4e5e3655c6..57e071d6734 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -221,3 +221,85 @@ returns │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## OFFSET FETCH Clause {#offset-fetch} + +`OFFSET` and `FETCH` allow you to retrieve data by portions. They specify a row block which you want to get by a single query. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +The `offset_row_count` or `fetch_row_count` value can be a number or a literal constant. You can omit `fetch_row_count`; by default, it equals 1. + +`OFFSET` specifies the number of rows to skip before starting to return rows from the query. + +The `FETCH` specifies the maximum number of rows that can be in the result of a query. + +The `ONLY` option is used to return rows that immediately follow the rows omitted by the `OFFSET`. In this case the `FETCH` is an alternative to the [LIMIT](../../../sql-reference/statements/select/limit.md) clause. For example, the following query + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +is identical to the query + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +The `WITH TIES` option is used to return any additional rows that tie for the last place in the result set according to the `ORDER BY` clause. For example, if `fetch_row_count` is set to 5 but two additional rows match the values of the `ORDER BY` columns in the fifth row, the result set will contain seven rows. + +!!! note "Note" + According to the standard, the `OFFSET` clause must come before the `FETCH` clause if both are present. + +### Examples {#examples} + +Input table: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Usage of the `ONLY` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Usage of the `WITH TIES` option: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Result: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/order-by/) diff --git a/docs/ru/sql-reference/statements/select/order-by.md b/docs/ru/sql-reference/statements/select/order-by.md index 0476c5da5af..ea0f40b2dc0 100644 --- a/docs/ru/sql-reference/statements/select/order-by.md +++ b/docs/ru/sql-reference/statements/select/order-by.md @@ -214,3 +214,85 @@ ORDER BY │ 1970-03-12 │ 1970-01-08 │ original │ └────────────┴────────────┴──────────┘ ``` + +## Секция OFFSET FETCH {#offset-fetch} + +`OFFSET` и `FETCH` позволяют извлекать данные по частям. Они указывают строки, которые вы хотите получить в результате запроса. + +``` sql +OFFSET offset_row_count {ROW | ROWS}] [FETCH {FIRST | NEXT} fetch_row_count {ROW | ROWS} {ONLY | WITH TIES}] +``` + +`offset_row_count` или `fetch_row_count` может быть числом или литеральной константой. Если вы не используете `fetch_row_count`, то его значение равно 1. + +`OFFSET` указывает количество строк, которые необходимо пропустить перед началом возврата строк из запроса. + +`FETCH` указывает максимальное количество строк, которые могут быть получены в результате запроса. + +Опция `ONLY` используется для возврата строк, которые следуют сразу же за строками, пропущенными секцией `OFFSET`. В этом случае `FETCH` — это альтернатива [LIMIT](../../../sql-reference/statements/select/limit.md). Например, следующий запрос + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 1 ROW FETCH FIRST 3 ROWS ONLY; +``` + +идентичен запросу + +``` sql +SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; +``` + +Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк. + +!!! note "Примечание" + Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. + +### Примеры {#examples} + +Входная таблица: + +``` text +┌─a─┬─b─┐ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 1 │ 3 │ +│ 5 │ 4 │ +│ 0 │ 6 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +Использование опции `ONLY`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS ONLY; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +└───┴───┘ +``` + +Использование опции `WITH TIES`: + +``` sql +SELECT * FROM test_fetch ORDER BY a OFFSET 3 ROW FETCH FIRST 3 ROWS WITH TIES; +``` + +Результат: + +``` text +┌─a─┬─b─┐ +│ 2 │ 1 │ +│ 3 │ 4 │ +│ 5 │ 4 │ +│ 5 │ 7 │ +└───┴───┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/order-by/)