From dd3ce02644b1c2abc290cd988e52a831922316e5 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Mon, 19 Aug 2019 12:53:23 +0300 Subject: [PATCH 001/222] Typo fix. --- docs/en/query_language/functions/functions_for_nulls.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/functions/functions_for_nulls.md b/docs/en/query_language/functions/functions_for_nulls.md index 4b7257fd4b3..41fec479d0d 100644 --- a/docs/en/query_language/functions/functions_for_nulls.md +++ b/docs/en/query_language/functions/functions_for_nulls.md @@ -241,7 +241,7 @@ SHOW CREATE TABLE t_null └───┴──────┘ ``` -Apply the `resumenotnull` function to the `y` column. +Apply the `assumeNotNull` function to the `y` column. ``` SELECT assumeNotNull(y) FROM t_null From 4cdb4d5ff229ccfb56653c8770a13e1024e186c8 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 10 Sep 2019 13:09:36 +0300 Subject: [PATCH 002/222] Links fix. --- docs/en/operations/system_tables.md | 6 +++--- docs/en/query_language/system.md | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 4e3386764fd..0b6481de3c1 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -64,9 +64,9 @@ Please note that `errors_count` is updated once per query to the cluster, but `e ** See also ** -- [Table engine Distributed](../../operations/table_engines/distributed.md) -- [distributed_replica_error_cap setting](../settings/settings.md#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life setting](../settings/settings.md#settings-distributed_replica_error_half_life) +- [Table engine Distributed](table_engines/distributed.md) +- [distributed_replica_error_cap setting](settings/settings.md#settings-distributed_replica_error_cap) +- [distributed_replica_error_half_life setting](settings/settings.md#settings-distributed_replica_error_half_life) ## system.columns diff --git a/docs/en/query_language/system.md b/docs/en/query_language/system.md index 648aa07f5e7..3ef504e46b3 100644 --- a/docs/en/query_language/system.md +++ b/docs/en/query_language/system.md @@ -15,7 +15,7 @@ ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} Reloads all dictionaries that have been successfully loaded before. -By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#dictionaries-lazy-load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). +By default, dictionaries are loaded lazily (see [dictionaries_lazy_load](../operations/server_settings/settings.md#server_settings-dictionaries_lazy_load)), so instead of being loaded automatically at startup, they are initialized on first access through dictGet function or SELECT from tables with ENGINE = Dictionary. The `SYSTEM RELOAD DICTIONARIES` query reloads such dictionaries (LOADED). Always returns `Ok.` regardless of the result of the dictionary update. ## RELOAD DICTIONARY dictionary_name {#query_language-system-reload-dictionary} From 92b3183bceee9cd70a783bccbf4dfff5dd95d066 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 10 Sep 2019 14:27:20 +0300 Subject: [PATCH 003/222] Fixed links in docs. --- docs/ru/interfaces/formats.md | 2 +- docs/toc_ru.yml | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index fc28d97ecb9..5b26d23d80a 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -936,7 +936,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../../operations/table_engines/hdfs.md) и `URL`. +Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../operations/table_engines/hdfs.md) и `URL`. ## Схема формата {#formatschema} diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index 2b7a7f156ab..b21bcc838dc 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -88,6 +88,7 @@ nav: - 'MySQL': 'operations/table_engines/mysql.md' - 'JDBC': 'operations/table_engines/jdbc.md' - 'ODBC': 'operations/table_engines/odbc.md' + - 'HDFS': 'operations/table_engines/hdfs.md' - 'Особые': - 'Distributed': 'operations/table_engines/distributed.md' - 'Внешние данные': 'operations/table_engines/external_data.md' @@ -159,6 +160,7 @@ nav: - 'mysql': 'query_language/table_functions/mysql.md' - 'jdbc': 'query_language/table_functions/jdbc.md' - 'odbc': 'query_language/table_functions/odbc.md' + - 'hdfs': 'query_language/table_functions/hdfs.md' - 'input': 'query_language/table_functions/input.md' - 'Словари': - 'Введение': 'query_language/dicts/index.md' From 16e3428891c02f0a61c25b4f7a5aacf1b9417e52 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 10 Sep 2019 14:30:32 +0300 Subject: [PATCH 004/222] More fixes. --- docs/ru/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 5b26d23d80a..9acf2d67e4a 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -936,7 +936,7 @@ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parq clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq} ``` -Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [`HDFS`](../operations/table_engines/hdfs.md) и `URL`. +Для обмена данными с экосистемой Hadoop можно использовать движки таблиц [HDFS](../operations/table_engines/hdfs.md) и `URL`. ## Схема формата {#formatschema} From 4576e1f4b2a11b166e074542d6425d8265dd7ba7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 13 Sep 2019 11:59:46 +0300 Subject: [PATCH 005/222] Enable Processors by default. --- dbms/src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 0678aaeedc6..5f23c0a4be8 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -338,7 +338,7 @@ struct Settings : public SettingsCollection M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \ \ - M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \ + M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \ \ M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \ M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \ From 1335aa75d795f0dc04af7a55d1f83ce1f27ee5c8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 13 Sep 2019 15:34:05 +0300 Subject: [PATCH 006/222] Added TreeExecutor. --- .../src/Processors/Executors/TreeExecutor.cpp | 157 ++++++++++++++++++ dbms/src/Processors/Executors/TreeExecutor.h | 28 ++++ 2 files changed, 185 insertions(+) create mode 100644 dbms/src/Processors/Executors/TreeExecutor.cpp create mode 100644 dbms/src/Processors/Executors/TreeExecutor.h diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp new file mode 100644 index 00000000000..d7fc1b78ede --- /dev/null +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -0,0 +1,157 @@ +#include +#include + +namespace DB +{ + +static void checkProcessorHasSingleOutput(IProcessor * processor) +{ + size_t num_outputs = processor->getOutputs().size(); + if (num_outputs != 1) + throw Exception("All processors in TreeExecutor must have single output, " + "but processor with name " + processor->getName() + " has " + std::to_string(num_outputs), + ErrorCodes::LOGICAL_ERROR); +} + +static void validateTree(const Processors & processors, IProcessor * root) +{ + std::unordered_map index; + + for (auto & processor : processors) + { + bool is_inserted = index.try_emplace(processor.get(), index.size()).second; + + if (!is_inserted) + throw Exception("Duplicate processor in TreeExecutor with name " + processor->getName(), + ErrorCodes::LOGICAL_ERROR); + } + + std::vector is_visited(processors.size(), false); + std::stack stack; + + stack.push(root); + + while (!stack.empty()) + { + IProcessor * node = stack.top(); + stack.pop(); + + auto it = index.find(node); + + if (it == index.end()) + throw Exception("Processor with name " + node->getName() + " " + "was not mentioned in list passed to TreeExecutor, " + "but was traversed to from other processors.", ErrorCodes::LOGICAL_ERROR); + + size_t position = it->second; + + if (is_visited[position]) + throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. " + "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR); + + checkProcessorHasSingleOutput(node); + + auto & children = node->getInputs(); + for (auto & child : children) + stack.push(&child.getOutputPort().getProcessor()); + } + + for (size_t i = 0; i < is_visited.size(); ++i) + if (!is_visited[i]) + throw Exception("Processor with name " + processors[i]->getName() + + " was not visited by traverse in TreeExecutor.", ErrorCodes::LOGICAL_ERROR); +} + +void TreeExecutor::init() +{ + if (processors.empty()) + throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR); + + root = processors.back().get(); + + validateTree(processors, root); + + port = std::make_unique(getHeader(), root); + port->setNeeded(); +} + +void TreeExecutor::execute() +{ + std::stack stack; + stack.push(root); + + while (!stack.empty()) + { + IProcessor * node = stack.top(); + + auto status = node->prepare(); + + switch (status) + { + case IProcessor::Status::NeedData: + { + auto & inputs = node->getInputs(); + + if (inputs.empty()) + throw Exception("Processors " + node->getName() + " with empty input " + "has returned NeedData in TreeExecutor", ErrorCodes::LOGICAL_ERROR); + + bool all_finished = true; + + for (auto & input : inputs) + { + if (input.isFinished()) + continue; + + all_finished = false; + + stack.push(&input.getOutputPort().getProcessor()); + } + + if (all_finished) + throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutor, " + "but all it's inputs are finished.", ErrorCodes::LOGICAL_ERROR); + break; + } + case IProcessor::Status::PortFull: + { + stack.pop(); + break; + } + case IProcessor::Status::Finished: + { + stack.pop(); + break; + } + case IProcessor::Status::Ready: + { + node->work(); + break; + } + case IProcessor::Status::Async: + case IProcessor::Status::Wait: + case IProcessor::Status::ExpandPipeline: + { + throw Exception("Processor with name " + node->getName() + " " + "returned status " + IProcessor::statusToName(status) + " " + "which is not supported in TreeExecutor.", ErrorCodes::LOGICAL_ERROR); + } + } + } +} + +Block TreeExecutor::readImpl() +{ + while (true) + { + if (port->isFinished()) + return {}; + + if (port->hasData()) + return getHeader().cloneWithColumns(port->pull().detachColumns()); + + execute(); + } +} + +} diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h new file mode 100644 index 00000000000..0aad5b3024a --- /dev/null +++ b/dbms/src/Processors/Executors/TreeExecutor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace DB +{ + +class TreeExecutor : public IBlockInputStream +{ +public: + explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); } + + String getName() const override { return root->getName(); } + Block getHeader() const override { return root->getOutputs().front().getHeader(); } + +protected: + Block readImpl() override; + +private: + Processors processors; + IProcessor * root = nullptr; + std::unique_ptr port; + + void init(); + void execute(); +}; + +} From 1f5e62d741bd43c8bc24d838d34cf1a3e85efc71 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 13 Sep 2019 15:59:48 +0300 Subject: [PATCH 007/222] Added IStorage::readWithProcessors. --- dbms/src/Storages/IStorage.cpp | 21 +++++++++++++++++++++ dbms/src/Storages/IStorage.h | 13 +++++++++++++ 2 files changed, 34 insertions(+) diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index cbd14666006..cd4b4f2e362 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -4,6 +4,8 @@ #include #include +#include + #include #include @@ -423,4 +425,23 @@ void IStorage::alter( } } +BlockInputStreams IStorage::read( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) +{ + auto pipes = readWithProcessors(column_names, query_info, context, processed_stage, max_block_size, num_streams); + + BlockInputStreams res; + res.reserve(pipes.size()); + + for (auto & pipe : pipes) + res.emplace_back(std::make_shared(std::move(pipe))); + + return res; +} + } diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index d92b06029d8..11fcaad1a03 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -41,6 +41,11 @@ class AlterCommands; class MutationCommands; class PartitionCommands; +class IProcessor; +using ProcessorPtr = std::shared_ptr; +using Processors = std::vector; +using Pipes = std::vector; + struct ColumnSize { size_t marks = 0; @@ -234,6 +239,14 @@ public: * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). */ virtual BlockInputStreams read( + const Names & /*column_names*/, + const SelectQueryInfo & /*query_info*/, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t /*max_block_size*/, + unsigned /*num_streams*/); + + virtual Pipes readWithProcessors( const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, From 3c53dfd227be24a4f3189e15ee94db7d5b047e57 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 13 Sep 2019 18:41:09 +0300 Subject: [PATCH 008/222] Add processors to StorageMergeTree [WIP]. --- .../MergeTreeBaseSelectBlockInputStream.cpp | 107 ++++++++++++++---- .../MergeTreeBaseSelectBlockInputStream.h | 22 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 14 +-- .../MergeTree/MergeTreeDataSelectExecutor.h | 10 +- .../Storages/MergeTree/MergeTreeRangeReader.h | 2 +- .../MergeTreeThreadSelectBlockInputStream.cpp | 14 +-- .../MergeTreeThreadSelectBlockInputStream.h | 4 +- dbms/src/Storages/StorageMergeTree.cpp | 2 +- dbms/src/Storages/StorageMergeTree.h | 2 +- 9 files changed, 112 insertions(+), 65 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp index 0489182fe55..077e3ea0712 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp @@ -19,7 +19,8 @@ namespace ErrorCodes } -MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream( +MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor( + Block header, const MergeTreeData & storage_, const PrewhereInfoPtr & prewhere_info_, UInt64 max_block_size_rows_, @@ -31,6 +32,7 @@ MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream( bool save_marks_in_cache_, const Names & virt_column_names_) : + ISource(getHeader(std::move(header), prewhere_info_, virt_column_names_)), storage(storage_), prewhere_info(prewhere_info_), max_block_size_rows(max_block_size_rows_), @@ -45,26 +47,27 @@ MergeTreeBaseSelectBlockInputStream::MergeTreeBaseSelectBlockInputStream( } -Block MergeTreeBaseSelectBlockInputStream::readImpl() +Chunk MergeTreeBaseSelectBlockInputProcessor::generate() { - Block res; - - while (!res && !isCancelled()) + while (!isCancelled()) { if ((!task || task->isFinished()) && !getNewTask()) - break; + return {}; - res = readFromPart(); + auto res = readFromPart(); - if (res) - injectVirtualColumns(res); + if (!res.hasNoRows()) + { + injectVirtualColumns(res, task.get(), virt_column_names); + return res; + } } - return res; + return {}; } -void MergeTreeBaseSelectBlockInputStream::initializeRangeReaders(MergeTreeReadTask & current_task) +void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeReadTask & current_task) { if (prewhere_info) { @@ -103,7 +106,7 @@ void MergeTreeBaseSelectBlockInputStream::initializeRangeReaders(MergeTreeReadTa } -Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl() +Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() { if (task->size_predictor) task->size_predictor->startBlock(); @@ -160,7 +163,8 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl() UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows(); - progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); + /// TODO + /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); if (task->size_predictor) { @@ -177,13 +181,14 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPartImpl() column.column = column.column->convertToFullColumnIfConst(); } - read_result.block.checkNumberOfRows(); + UInt64 num_rows = read_result.columns.empty() ? 0 + : read_result.columns[0]->size(); - return read_result.block; + return Chunk(std::move(read_result.columns), num_rows); } -Block MergeTreeBaseSelectBlockInputStream::readFromPart() +Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPart() { if (!task->range_reader.isInitialized()) initializeRangeReaders(*task); @@ -192,15 +197,18 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart() } -void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) const +template +static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, MergeTreeReadTask * task, const Names & virtual_columns) { /// add virtual columns /// Except _sample_factor, which is added from the outside. - if (!virt_column_names.empty()) + if (!virtual_columns.empty()) { - const auto rows = block.rows(); + if (unlikely(rows && !task)) + throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.", + ErrorCodes::LOGICAL_ERROR); - for (const auto & virt_column_name : virt_column_names) + for (const auto & virt_column_name : virtual_columns) { if (virt_column_name == "_part") { @@ -210,7 +218,7 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co else column = DataTypeString().createColumn(); - block.insert({ column, std::make_shared(), virt_column_name}); + callback.template insert(column, virt_column_name); } else if (virt_column_name == "_part_index") { @@ -220,7 +228,7 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co else column = DataTypeUInt64().createColumn(); - block.insert({ column, std::make_shared(), virt_column_name}); + callback.template insert(column, virt_column_name); } else if (virt_column_name == "_partition_id") { @@ -230,14 +238,55 @@ void MergeTreeBaseSelectBlockInputStream::injectVirtualColumns(Block & block) co else column = DataTypeString().createColumn(); - block.insert({ column, std::make_shared(), virt_column_name}); + callback.template insert(column, virt_column_name); } } } } +namespace +{ + struct InsertIntoBlockCallback + { + template + void insert(const ColumnPtr & column, const String & name) + { + block.insert({column, std::make_shared(), name}); + } -void MergeTreeBaseSelectBlockInputStream::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info) + Block & block; + }; + + struct InsertIntoColumnsCallback + { + template + void insert(const ColumnPtr & column, const String &) + { + columns.push_back(column); + } + + Columns & columns; + }; +} + +void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns) +{ + InsertIntoBlockCallback callback { block }; + injectVirtualColumnsImpl(block.rows(), callback, task, virtual_columns); +} + +void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns) +{ + UInt64 num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + InsertIntoColumnsCallback callback { columns }; + injectVirtualColumnsImpl(num_rows, callback, task, virtual_columns); + + chunk.setColumns(columns, num_rows); +} + +void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info) { if (prewhere_info) { @@ -253,7 +302,15 @@ void MergeTreeBaseSelectBlockInputStream::executePrewhereActions(Block & block, } } +Block MergeTreeBaseSelectBlockInputProcessor::getHeader( + Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns) +{ + executePrewhereActions(block, prewhere_info); + injectVirtualColumns(block, nullptr, virtual_columns); + return block; +} -MergeTreeBaseSelectBlockInputStream::~MergeTreeBaseSelectBlockInputStream() = default; + +MergeTreeBaseSelectBlockInputProcessor::~MergeTreeBaseSelectBlockInputProcessor() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h index 640f73652e4..0abbb2d001c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h @@ -5,6 +5,8 @@ #include #include +#include + namespace DB { @@ -14,10 +16,11 @@ class MarkCache; /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream -class MergeTreeBaseSelectBlockInputStream : public IBlockInputStream +class MergeTreeBaseSelectBlockInputProcessor : public ISource { public: - MergeTreeBaseSelectBlockInputStream( + MergeTreeBaseSelectBlockInputProcessor( + Block header, const MergeTreeData & storage_, const PrewhereInfoPtr & prewhere_info_, UInt64 max_block_size_rows_, @@ -29,24 +32,23 @@ public: bool save_marks_in_cache_ = true, const Names & virt_column_names_ = {}); - ~MergeTreeBaseSelectBlockInputStream() override; + ~MergeTreeBaseSelectBlockInputProcessor() override; static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info); protected: - Block readImpl() final; + Chunk generate() final; /// Creates new this->task, and initilizes readers virtual bool getNewTask() = 0; - /// We will call progressImpl manually. - void progress(const Progress &) override {} + virtual Chunk readFromPart(); - virtual Block readFromPart(); + Chunk readFromPartImpl(); - Block readFromPartImpl(); - - void injectVirtualColumns(Block & block) const; + static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns); + static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns); + static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns); void initializeRangeReaders(MergeTreeReadTask & task); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 40dc0bf6b52..95f76a4c7f7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -141,7 +141,7 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, siz } -BlockInputStreams MergeTreeDataSelectExecutor::read( +Pipes MergeTreeDataSelectExecutor::read( const Names & column_names_to_return, const SelectQueryInfo & query_info, const Context & context, @@ -154,7 +154,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::read( max_block_size, num_streams, max_block_numbers_to_read); } -BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( +Pipes MergeTreeDataSelectExecutor::readFromParts( MergeTreeData::DataPartsVector parts, const Names & column_names_to_return, const SelectQueryInfo & query_info, @@ -565,7 +565,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges); ProfileEvents::increment(ProfileEvents::SelectedMarks, sum_marks); - BlockInputStreams res; + Pipes res; if (select.final()) { @@ -658,7 +658,7 @@ size_t roundRowsOrBytesToMarks( } -BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( +Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( RangesInDataParts && parts, size_t num_streams, const Names & column_names, @@ -707,7 +707,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( if (sum_marks > max_marks_to_use_cache) use_uncompressed_cache = false; - BlockInputStreams res; + Pipes res; if (sum_marks > 0 && settings.merge_tree_uniform_read_distribution == 1) { @@ -817,7 +817,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( return res; } -BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( +Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, size_t num_streams, const Names & column_names, @@ -1026,7 +1026,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithO } -BlockInputStreams MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( +Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, const Names & column_names, UInt64 max_block_size, diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 44857799d01..9b46b663ab2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -24,7 +24,7 @@ public: */ using PartitionIdToMaxBlock = std::unordered_map; - BlockInputStreams read( + Pipes read( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, @@ -32,7 +32,7 @@ public: unsigned num_streams, const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const; - BlockInputStreams readFromParts( + Pipes readFromParts( MergeTreeData::DataPartsVector parts, const Names & column_names, const SelectQueryInfo & query_info, @@ -46,7 +46,7 @@ private: Logger * log; - BlockInputStreams spreadMarkRangesAmongStreams( + Pipes spreadMarkRangesAmongStreams( RangesInDataParts && parts, size_t num_streams, const Names & column_names, @@ -56,7 +56,7 @@ private: const Names & virt_columns, const Settings & settings) const; - BlockInputStreams spreadMarkRangesAmongStreamsWithOrder( + Pipes spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, size_t num_streams, const Names & column_names, @@ -67,7 +67,7 @@ private: const Names & virt_columns, const Settings & settings) const; - BlockInputStreams spreadMarkRangesAmongStreamsFinal( + Pipes spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, const Names & column_names, UInt64 max_block_size, diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index 0eae69ee17e..4261509d7fc 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -157,7 +157,7 @@ public: void addNumBytesRead(size_t count) { num_bytes_read += count; } - Block block; + Columns columns; private: RangesInfo started_ranges; diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp index 69cf173212d..cd6efa6b7d1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp @@ -20,7 +20,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( const Settings & settings, const Names & virt_column_names_) : - MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_, + MergeTreeBaseSelectBlockInputProcessor{pool->getHeader(), storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_}, thread{thread_}, @@ -38,19 +38,9 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( else min_marks_to_read = min_marks_to_read_; - ordered_names = getHeader().getNames(); + ordered_names = getPort().getHeader().getNames(); } - -Block MergeTreeThreadSelectBlockInputStream::getHeader() const -{ - auto res = pool->getHeader(); - executePrewhereActions(res, prewhere_info); - injectVirtualColumns(res); - return res; -} - - /// Requests read task from MergeTreeReadPool and signals whether it got one bool MergeTreeThreadSelectBlockInputStream::getNewTask() { diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h index 3c7dfb7927d..9603d21fb33 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h @@ -11,7 +11,7 @@ class MergeTreeReadPool; /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked * to perform. */ -class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream +class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputProcessor { public: MergeTreeThreadSelectBlockInputStream( @@ -31,8 +31,6 @@ public: ~MergeTreeThreadSelectBlockInputStream() override; - Block getHeader() const override; - protected: /// Requests read task from MergeTreeReadPool and signals whether it got one bool getNewTask() override; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 77c5a909f0c..bc50dec5b72 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -121,7 +121,7 @@ StorageMergeTree::~StorageMergeTree() shutdown(); } -BlockInputStreams StorageMergeTree::read( +Pipes StorageMergeTree::readWithProcessors( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 04b20fda5b9..6d55b4655ce 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -35,7 +35,7 @@ public: bool supportsIndexForIn() const override { return true; } - BlockInputStreams read( + Pipes readWithProcessors( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, From 5108ebeece9db5b7146690e0047c3f9d8c7f6a4f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 23 Sep 2019 22:22:02 +0300 Subject: [PATCH 009/222] Remove Block from RangeReader. --- .../MergeTreeBaseSelectBlockInputStream.cpp | 36 +-- .../MergeTree/MergeTreeRangeReader.cpp | 257 +++++++++++------- .../Storages/MergeTree/MergeTreeRangeReader.h | 19 +- .../Storages/MergeTree/MergeTreeReader.cpp | 189 +++++++------ dbms/src/Storages/MergeTree/MergeTreeReader.h | 28 +- 5 files changed, 296 insertions(+), 233 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp index 077e3ea0712..731624d1997 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp @@ -158,10 +158,10 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges); /// All rows were filtered. Repeat. - if (read_result.block.rows() == 0) - read_result.block.clear(); + if (read_result.num_rows == 0) + read_result.columns.clear(); - UInt64 num_filtered_rows = read_result.numReadRows() - read_result.block.rows(); + UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; /// TODO /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); @@ -170,21 +170,11 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() { task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); - if (read_result.block) - task->size_predictor->update(read_result.block); + if (!read_result.columns.empty()) + task->size_predictor->update(read_result.columns); } - if (read_result.block && prewhere_info && !task->remove_prewhere_column) - { - /// Convert const column to full here because it's cheaper to filter const column than full. - auto & column = read_result.block.getByName(prewhere_info->prewhere_column_name); - column.column = column.column->convertToFullColumnIfConst(); - } - - UInt64 num_rows = read_result.columns.empty() ? 0 - : read_result.columns[0]->size(); - - return Chunk(std::move(read_result.columns), num_rows); + return Chunk(std::move(read_result.columns), read_result.num_rows); } @@ -208,9 +198,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.", ErrorCodes::LOGICAL_ERROR); - for (const auto & virt_column_name : virtual_columns) + for (const auto & virtual_column_name : virtual_columns) { - if (virt_column_name == "_part") + if (virtual_column_name == "_part") { ColumnPtr column; if (rows) @@ -218,9 +208,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeString().createColumn(); - callback.template insert(column, virt_column_name); + callback.template insert(column, virtual_column_name); } - else if (virt_column_name == "_part_index") + else if (virtual_column_name == "_part_index") { ColumnPtr column; if (rows) @@ -228,9 +218,9 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeUInt64().createColumn(); - callback.template insert(column, virt_column_name); + callback.template insert(column, virtual_column_name); } - else if (virt_column_name == "_partition_id") + else if (virtual_column_name == "_partition_id") { ColumnPtr column; if (rows) @@ -238,7 +228,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeString().createColumn(); - callback.template insert(column, virt_column_name); + callback.template insert(column, virtual_column_name); } } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 932721eb028..99d83789f45 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -27,11 +27,11 @@ size_t MergeTreeRangeReader::DelayedStream::position() const return num_rows_before_current_mark + current_offset + num_delayed_rows; } -size_t MergeTreeRangeReader::DelayedStream::readRows(Block & block, size_t num_rows) +size_t MergeTreeRangeReader::DelayedStream::readRows(Columns & columns, size_t num_rows) { if (num_rows) { - size_t rows_read = merge_tree_reader->readRows(current_mark, continue_reading, num_rows, block); + size_t rows_read = merge_tree_reader->readRows(current_mark, continue_reading, num_rows, columns); continue_reading = true; /// Zero rows_read maybe either because reading has finished @@ -47,7 +47,7 @@ size_t MergeTreeRangeReader::DelayedStream::readRows(Block & block, size_t num_r return 0; } -size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark, size_t offset, size_t num_rows) +size_t MergeTreeRangeReader::DelayedStream::read(Columns & columns, size_t from_mark, size_t offset, size_t num_rows) { size_t num_rows_before_from_mark = index_granularity->getMarkStartingRow(from_mark); /// We already stand accurately in required position, @@ -60,7 +60,7 @@ size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark } else { - size_t read_rows = finalize(block); + size_t read_rows = finalize(columns); continue_reading = false; current_mark = from_mark; @@ -71,7 +71,7 @@ size_t MergeTreeRangeReader::DelayedStream::read(Block & block, size_t from_mark } } -size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block) +size_t MergeTreeRangeReader::DelayedStream::finalize(Columns & columns) { /// We need to skip some rows before reading if (current_offset && !continue_reading) @@ -89,13 +89,14 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block) } - /// Skip some rows from beging of granule + /// Skip some rows from begin of granule. /// We don't know size of rows in compressed granule, - /// so have to read them and throw out + /// so have to read them and throw out. if (current_offset) { - Block temp_block; - readRows(temp_block, current_offset); + Columns tmp_columns; + tmp_columns.resize(columns.size()); + readRows(tmp_columns, current_offset); } } @@ -103,7 +104,7 @@ size_t MergeTreeRangeReader::DelayedStream::finalize(Block & block) current_offset += num_delayed_rows; num_delayed_rows = 0; - return readRows(block, rows_to_read); + return readRows(columns, rows_to_read); } @@ -138,9 +139,9 @@ void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_r throw Exception("Cannot read from granule more than index_granularity.", ErrorCodes::LOGICAL_ERROR); } -size_t MergeTreeRangeReader::Stream::readRows(Block & block, size_t num_rows) +size_t MergeTreeRangeReader::Stream::readRows(Columns & columns, size_t num_rows) { - size_t rows_read = stream.read(block, current_mark, offset_after_current_mark, num_rows); + size_t rows_read = stream.read(columns, current_mark, offset_after_current_mark, num_rows); if (stream.isFinished()) finish(); @@ -163,7 +164,7 @@ void MergeTreeRangeReader::Stream::toNextMark() offset_after_current_mark = 0; } -size_t MergeTreeRangeReader::Stream::read(Block & block, size_t num_rows, bool skip_remaining_rows_in_current_granule) +size_t MergeTreeRangeReader::Stream::read(Columns & columns, size_t num_rows, bool skip_remaining_rows_in_current_granule) { checkEnoughSpaceInCurrentGranule(num_rows); @@ -171,7 +172,7 @@ size_t MergeTreeRangeReader::Stream::read(Block & block, size_t num_rows, bool s { checkNotFinished(); - size_t read_rows = readRows(block, num_rows); + size_t read_rows = readRows(columns, num_rows); offset_after_current_mark += num_rows; @@ -212,9 +213,9 @@ void MergeTreeRangeReader::Stream::skip(size_t num_rows) } } -size_t MergeTreeRangeReader::Stream::finalize(Block & block) +size_t MergeTreeRangeReader::Stream::finalize(Columns & columns) { - size_t read_rows = stream.finalize(block); + size_t read_rows = stream.finalize(columns); if (stream.isFinished()) finish(); @@ -223,10 +224,10 @@ size_t MergeTreeRangeReader::Stream::finalize(Block & block) } -void MergeTreeRangeReader::ReadResult::addGranule(size_t num_rows) +void MergeTreeRangeReader::ReadResult::addGranule(size_t num_rows_) { - rows_per_granule.push_back(num_rows); - total_rows_per_granule += num_rows; + rows_per_granule.push_back(num_rows_); + total_rows_per_granule += num_rows_; } void MergeTreeRangeReader::ReadResult::adjustLastGranule() @@ -353,13 +354,13 @@ size_t MergeTreeRangeReader::ReadResult::numZerosInTail(const UInt8 * begin, con zero16))) | (static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8( _mm_loadu_si128(reinterpret_cast(pos + 16)), - zero16))) << 16) + zero16))) << 16u) | (static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8( _mm_loadu_si128(reinterpret_cast(pos + 32)), - zero16))) << 32) + zero16))) << 32u) | (static_cast(_mm_movemask_epi8(_mm_cmpgt_epi8( _mm_loadu_si128(reinterpret_cast(pos + 48)), - zero16))) << 48); + zero16))) << 48u); if (val == 0) count += 64; else @@ -412,7 +413,7 @@ MergeTreeRangeReader::MergeTreeRangeReader( bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_), index_granularity(&(merge_tree_reader->data_part->index_granularity)) , prev_reader(prev_reader_), prewhere_column_name(prewhere_column_name_) - , ordered_names(ordered_names_), alias_actions(alias_actions_), prewhere_actions(std::move(prewhere_actions_)) + , ordered_names(ordered_names_), alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_)) , always_reorder(always_reorder_), remove_prewhere_column(remove_prewhere_column_) , last_reader_in_chain(last_reader_in_chain_), is_initialized(true) { @@ -476,92 +477,100 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar ReadResult read_result; size_t prev_bytes = 0; - bool should_reorder = false; if (prev_reader) { read_result = prev_reader->read(max_rows, ranges); - prev_bytes = read_result.block.bytes(); - Block block = continueReadingChain(read_result); + prev_bytes = read_result.numBytesRead(); + + size_t num_read_rows; + Columns columns = continueReadingChain(read_result, num_read_rows); + + /// Nothing to do. Return empty result. + if (read_result.num_rows == 0) + return read_result; + + bool has_columns = false; + for (auto & column : columns) + if (column) + has_columns = true; bool should_evaluate_missing_defaults = false; - if (block) + + if (has_columns) { - /// block.rows() <= read_result.block. We must filter block before adding columns to read_result.block + /// num_read_rows >= read_result.num_rows + /// We must filter block before adding columns to read_result.block /// Fill missing columns before filtering because some arrays from Nested may have empty data. - merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, block.rows()); + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_read_rows); if (read_result.getFilter()) - filterBlock(block, read_result.getFilter()->getData()); + filterColumns(columns, read_result.getFilter()->getData()); } else { - size_t num_rows = read_result.block.rows(); - if (!read_result.block) - { - if (auto * filter = read_result.getFilter()) - num_rows = countBytesInFilter(filter->getData()); /// All columns were removed and filter is not always true. - else if (read_result.totalRowsPerGranule()) - num_rows = read_result.numReadRows(); /// All columns were removed and filter is always true. - /// else filter is always false. - } + size_t num_rows = read_result.num_rows; /// If block is empty, we still may need to add missing columns. /// In that case use number of rows in result block and don't filter block. if (num_rows) - merge_tree_reader->fillMissingColumns(block, should_reorder, should_evaluate_missing_defaults, num_rows); + merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); } - for (auto i : ext::range(0, block.columns())) - read_result.block.insert(std::move(block.getByPosition(i))); + read_result.columns.reserve(read_result.columns.size() + columns.size()); + for (auto & column : columns) + read_result.columns.emplace_back(std::move(column)); - if (read_result.block) + if (!read_result.columns.empty()) { if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults(read_result.block); + merge_tree_reader->evaluateMissingDefaults(read_result.columns); } } else { read_result = startReadingChain(max_rows, ranges); - if (read_result.block) + read_result.num_rows = read_result.numReadRows(); + + if (read_result.num_rows) { bool should_evaluate_missing_defaults; - merge_tree_reader->fillMissingColumns(read_result.block, should_reorder, should_evaluate_missing_defaults, - read_result.block.rows()); + merge_tree_reader->fillMissingColumns(read_result.columns, should_evaluate_missing_defaults, + read_result.num_rows); if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults(read_result.block); + merge_tree_reader->evaluateMissingDefaults(read_result.columns); } + else + read_result.columns.clear(); } - if (!read_result.block) + if (read_result.num_rows == 0) return read_result; - read_result.addNumBytesRead(read_result.block.bytes() - prev_bytes); + size_t total_bytes = 0; + for (auto & column : read_result.columns) + total_bytes += column->byteSize(); + + read_result.addNumBytesRead(total_bytes - prev_bytes); executePrewhereActionsAndFilterColumns(read_result); - if (last_reader_in_chain && (should_reorder || always_reorder)) - merge_tree_reader->reorderColumns(read_result.block, *ordered_names, prewhere_column_name); - return read_result; } -void MergeTreeRangeReader::filterBlock(Block & block, const IColumn::Filter & filter) const +void MergeTreeRangeReader::filterColumns(Columns & columns, const IColumn::Filter & filter) const { - for (const auto i : ext::range(0, block.columns())) + for (auto & column : columns) { - auto & col = block.getByPosition(i); - - if (col.column) + if (column) { - col.column = col.column->filter(filter, -1); + column = column->filter(filter, -1); - if (col.column->empty()) + if (column->empty()) { - block.clear(); + columns.clear(); return; } } @@ -571,6 +580,7 @@ void MergeTreeRangeReader::filterBlock(Block & block, const IColumn::Filter & fi MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t max_rows, MarkRanges & ranges) { ReadResult result; + result.columns.resize(merge_tree_reader->getColumns().size()); /// Stream is lazy. result.num_added_rows is the number of rows added to block which is not equal to /// result.num_rows_read until call to stream.finalize(). Also result.num_added_rows may be less than @@ -581,7 +591,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t { if (stream.isFinished()) { - result.addRows(stream.finalize(result.block)); + result.addRows(stream.finalize(result.columns)); stream = Stream(ranges.back().begin, ranges.back().end, merge_tree_reader); result.addRange(ranges.back()); ranges.pop_back(); @@ -589,13 +599,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t auto rows_to_read = std::min(space_left, stream.numPendingRowsInCurrentGranule()); bool last = rows_to_read == space_left; - result.addRows(stream.read(result.block, rows_to_read, !last)); + result.addRows(stream.read(result.columns, rows_to_read, !last)); result.addGranule(rows_to_read); space_left -= rows_to_read; } } - result.addRows(stream.finalize(result.block)); + result.addRows(stream.finalize(result.columns)); /// Last granule may be incomplete. result.adjustLastGranule(); @@ -603,22 +613,24 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t return result; } -Block MergeTreeRangeReader::continueReadingChain(ReadResult & result) +Columns MergeTreeRangeReader::continueReadingChain(ReadResult & result, size_t & num_rows) { - Block block; + Columns columns; + num_rows = 0; if (result.rowsPerGranule().empty()) { /// If zero rows were read on prev step, than there is no more rows to read. /// Last granule may have less rows than index_granularity, so finish reading manually. stream.finish(); - return block; + return columns; } + columns.resize(merge_tree_reader->getColumns().size()); + auto & rows_per_granule = result.rowsPerGranule(); auto & started_ranges = result.startedRanges(); - size_t added_rows = 0; size_t next_range_to_start = 0; auto size = rows_per_granule.size(); @@ -627,25 +639,25 @@ Block MergeTreeRangeReader::continueReadingChain(ReadResult & result) if (next_range_to_start < started_ranges.size() && i == started_ranges[next_range_to_start].num_granules_read_before_start) { - added_rows += stream.finalize(block); + num_rows += stream.finalize(columns); auto & range = started_ranges[next_range_to_start].range; ++next_range_to_start; stream = Stream(range.begin, range.end, merge_tree_reader); } bool last = i + 1 == size; - added_rows += stream.read(block, rows_per_granule[i], !last); + num_rows += stream.read(columns, rows_per_granule[i], !last); } stream.skip(result.numRowsToSkipInLastGranule()); - added_rows += stream.finalize(block); + num_rows += stream.finalize(columns); /// added_rows may be zero if all columns were read in prewhere and it's ok. - if (added_rows && added_rows != result.totalRowsPerGranule()) - throw Exception("RangeReader read " + toString(added_rows) + " rows, but " + if (num_rows && num_rows != result.totalRowsPerGranule()) + throw Exception("RangeReader read " + toString(num_rows) + " rows, but " + toString(result.totalRowsPerGranule()) + " expected.", ErrorCodes::LOGICAL_ERROR); - return block; + return columns; } void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & result) @@ -653,14 +665,38 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r if (!prewhere_actions) return; - if (alias_actions) - alias_actions->execute(result.block); + auto & header = merge_tree_reader->getColumns(); + size_t num_columns = header.size(); - prewhere_actions->execute(result.block); - auto & prewhere_column = result.block.getByName(*prewhere_column_name); - size_t prev_rows = result.block.rows(); - ColumnPtr filter = prewhere_column.column; - prewhere_column.column = nullptr; + if (result.columns.size() != num_columns) + throw Exception("Invalid number of columns passed to MergeTreeRangeReader. " + "Expected " + toString(num_columns) + ", " + "got " + toString(result.columns.size()), ErrorCodes::LOGICAL_ERROR); + + ColumnPtr filter; + size_t prewhere_column_pos; + + { + /// Restore block from columns list. + Block block; + auto name_and_type = header.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) + block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); + + if (alias_actions) + alias_actions->execute(block); + + prewhere_actions->execute(block); + + prewhere_column_pos = block.getPositionByName(*prewhere_column_name); + + result.columns.clear(); + result.columns.resize(block.columns()); + for (auto & col : block) + result.columns.emplace_back(std::move(col.column)); + + filter.swap(result.columns[prewhere_column_pos]); + } if (result.getFilter()) { @@ -677,46 +713,57 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r bool filter_always_true = !result.getFilter() && result.totalRowsPerGranule() == filter->size(); if (result.totalRowsPerGranule() == 0) - result.block.clear(); + { + result.columns.clear(); + result.num_rows = 0; + } else if (!filter_always_true) { FilterDescription filter_description(*filter); + size_t num_bytes_in_filter = 0; + bool calculated_num_bytes_in_filter = false; + + auto getNumBytesInFilter = [&]() + { + if (!calculated_num_bytes_in_filter) + num_bytes_in_filter = countBytesInFilter(*filter_description.data); + + calculated_num_bytes_in_filter = true; + return num_bytes_in_filter; + }; + if (last_reader_in_chain) { - size_t num_bytes_in_filter = countBytesInFilter(*filter_description.data); - if (num_bytes_in_filter == 0) - result.block.clear(); - else if (num_bytes_in_filter == filter->size()) + size_t bytes_in_filter = getNumBytesInFilter(); + if (bytes_in_filter == 0) + { + result.columns.clear(); + result.num_rows = 0; + } + else if (bytes_in_filter == filter->size()) filter_always_true = true; } if (!filter_always_true) - filterBlock(result.block, *filter_description.data); + { + filterColumns(result.columns, *filter_description.data); + + if (result.columns.empty()) + result.num_rows = getNumBytesInFilter(); + else + result.num_rows = result.columns[0]->size(); + } } - if (!result.block) + if (result.num_rows == 0) return; - auto getNumRows = [&]() - { - /// If block has single column, it's filter. We need to count bytes in it in order to get the number of rows. - if (result.block.columns() > 1) - return result.block.rows(); - else if (result.getFilter()) - return countBytesInFilter(result.getFilter()->getData()); - else - return prev_rows; - }; - if (remove_prewhere_column) - result.block.erase(*prewhere_column_name); + result.columns.erase(result.columns.begin() + prewhere_column_pos); else - prewhere_column.column = prewhere_column.type->createColumnConst(getNumRows(), 1u); - - /// If block is empty, create column in order to store rows number. - if (last_reader_in_chain && result.block.columns() == 0) - result.block.insert({ColumnNothing::create(getNumRows()), std::make_shared(), "_nothing"}); + result.columns[prewhere_column_pos] = + DataTypeUInt8().createColumnConst(result.num_rows, 1u)->convertToFullColumnIfConst(); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index 4261509d7fc..d3f1333289b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -47,10 +47,10 @@ public: /// Returns the number of rows added to block. /// NOTE: have to return number of rows because block has broken invariant: /// some columns may have different size (for example, default columns may be zero size). - size_t read(Block & block, size_t from_mark, size_t offset, size_t num_rows); + size_t read(Columns & columns, size_t from_mark, size_t offset, size_t num_rows); /// Skip extra rows to current_offset and perform actual reading - size_t finalize(Block & block); + size_t finalize(Columns & columns); bool isFinished() const { return is_finished; } @@ -69,7 +69,7 @@ public: /// Current position from the begging of file in rows size_t position() const; - size_t readRows(Block & block, size_t num_rows); + size_t readRows(Columns & columns, size_t num_rows); }; /// Very thin wrapper for DelayedStream @@ -81,8 +81,8 @@ public: Stream(size_t from_mark, size_t to_mark, MergeTreeReader * merge_tree_reader); /// Returns the number of rows added to block. - size_t read(Block & block, size_t num_rows, bool skip_remaining_rows_in_current_granule); - size_t finalize(Block & block); + size_t read(Columns & columns, size_t num_rows, bool skip_remaining_rows_in_current_granule); + size_t finalize(Columns & columns); void skip(size_t num_rows); void finish() { current_mark = last_mark; } @@ -112,7 +112,7 @@ public: void checkNotFinished() const; void checkEnoughSpaceInCurrentGranule(size_t num_rows) const; - size_t readRows(Block & block, size_t num_rows); + size_t readRows(Columns & columns, size_t num_rows); void toNextMark(); }; @@ -143,7 +143,7 @@ public: /// Filter you need to apply to newly-read columns in order to add them to block. const ColumnUInt8 * getFilter() const { return filter; } - void addGranule(size_t num_rows); + void addGranule(size_t num_rows_); void adjustLastGranule(); void addRows(size_t rows) { num_read_rows += rows; } void addRange(const MarkRange & range) { started_ranges.push_back({rows_per_granule.size(), range}); } @@ -158,6 +158,7 @@ public: void addNumBytesRead(size_t count) { num_bytes_read += count; } Columns columns; + size_t num_rows = 0; private: RangesInfo started_ranges; @@ -187,9 +188,9 @@ public: private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); - Block continueReadingChain(ReadResult & result); + Columns continueReadingChain(ReadResult & result, size_t & num_rows); void executePrewhereActionsAndFilterColumns(ReadResult & result); - void filterBlock(Block & block, const IColumn::Filter & filter) const; + void filterColumns(Columns & columns, const IColumn::Filter & filter) const; MergeTreeReader * merge_tree_reader = nullptr; const MergeTreeIndexGranularity * index_granularity = nullptr; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index d9732c8ac6f..cab963d0f66 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -6,7 +6,6 @@ #include #include #include -#include namespace DB @@ -31,16 +30,30 @@ namespace ErrorCodes MergeTreeReader::~MergeTreeReader() = default; -MergeTreeReader::MergeTreeReader(const String & path_, - const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_, - UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, bool save_marks_in_cache_, - const MergeTreeData & storage_, const MarkRanges & all_mark_ranges_, - size_t aio_threshold_, size_t max_read_buffer_size_, const ValueSizeMap & avg_value_size_hints_, +MergeTreeReader::MergeTreeReader( + String path_, + MergeTreeData::DataPartPtr data_part_, + NamesAndTypesList columns_, + UncompressedCache * uncompressed_cache_, + MarkCache * mark_cache_, + bool save_marks_in_cache_, + const MergeTreeData & storage_, + MarkRanges all_mark_ranges_, + size_t aio_threshold_, + size_t max_read_buffer_size_, + ValueSizeMap avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) - : data_part(data_part_), avg_value_size_hints(avg_value_size_hints_), path(path_), columns(columns_) - , uncompressed_cache(uncompressed_cache_), mark_cache(mark_cache_), save_marks_in_cache(save_marks_in_cache_), storage(storage_) - , all_mark_ranges(all_mark_ranges_), aio_threshold(aio_threshold_), max_read_buffer_size(max_read_buffer_size_) + : data_part(std::move(data_part_)) + , avg_value_size_hints(std::move(avg_value_size_hints_)) + , path(std::move(path_)), columns(std::move(columns_)) + , uncompressed_cache(uncompressed_cache_) + , mark_cache(mark_cache_) + , save_marks_in_cache(save_marks_in_cache_) + , storage(storage_) + , all_mark_ranges(std::move(all_mark_ranges_)) + , aio_threshold(aio_threshold_) + , max_read_buffer_size(max_read_buffer_size_) { try { @@ -61,34 +74,44 @@ const MergeTreeReader::ValueSizeMap & MergeTreeReader::getAvgValueSizeHints() co } -size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res) +size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns) { size_t read_rows = 0; try { + size_t num_columns = columns.size(); + + if (res_columns.size() != num_columns) + throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " + "Expected " + toString(num_columns) + ", " + "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); + /// Pointers to offset columns that are common to the nested data structure columns. /// If append is true, then the value will be equal to nullptr and will be used only to /// check that the offsets column has been already read. OffsetColumns offset_columns; - for (const NameAndTypePair & it : columns) + auto name_and_type = columns.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) { + auto & [name, type] = *name_and_type; + /// The column is already present in the block so we will append the values to the end. - bool append = res.has(it.name); + bool append = res_columns[pos] != nullptr; if (!append) - res.insert(ColumnWithTypeAndName(it.type->createColumn(), it.type, it.name)); + res_columns[pos] = name_and_type->type->createColumn(); /// To keep offsets shared. TODO Very dangerous. Get rid of this. - MutableColumnPtr column = res.getByName(it.name).column->assumeMutable(); + MutableColumnPtr column = res_columns[pos]->assumeMutable(); bool read_offsets = true; /// For nested data structures collect pointers to offset columns. - if (const DataTypeArray * type_arr = typeid_cast(it.type.get())) + if (const auto * type_arr = typeid_cast(type.get())) { - String name = Nested::extractTableName(it.name); + String table_name = Nested::extractTableName(name); - auto it_inserted = offset_columns.emplace(name, nullptr); + auto it_inserted = offset_columns.emplace(table_name, nullptr); /// offsets have already been read on the previous iteration and we don't need to read it again if (!it_inserted.second) @@ -108,27 +131,28 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t { size_t column_size_before_reading = column->size(); - readData(it.name, *it.type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets); + readData(name, *type, *column, from_mark, continue_reading, max_rows_to_read, read_offsets); /// For elements of Nested, column_size_before_reading may be greater than column size /// if offsets are not empty and were already read, but elements are empty. - if (column->size()) + if (!column->empty()) read_rows = std::max(read_rows, column->size() - column_size_before_reading); } catch (Exception & e) { /// Better diagnostics. - e.addMessage("(while reading column " + it.name + ")"); + e.addMessage("(while reading column " + name + ")"); throw; } - if (column->size()) - res.getByName(it.name).column = std::move(column); + if (column->empty()) + res_columns[pos] = nullptr; else - res.erase(it.name); + res_columns[pos] = std::move(column); } - /// NOTE: positions for all streams must be kept in sync. In particular, even if for some streams there are no rows to be read, + /// NOTE: positions for all streams must be kept in sync. + /// In particular, even if for some streams there are no rows to be read, /// you must ensure that no seeks are skipped and at this point they all point to to_mark. } catch (Exception & e) @@ -137,7 +161,9 @@ size_t MergeTreeReader::readRows(size_t from_mark, bool continue_reading, size_t storage.reportBrokenPart(data_part->name); /// Better diagnostics. - e.addMessage("(while reading from part " + path + " from mark " + toString(from_mark) + " with max_rows_to_read = " + toString(max_rows_to_read) + ")"); + e.addMessage("(while reading from part " + path + " " + "from mark " + toString(from_mark) + " " + "with max_rows_to_read = " + toString(max_rows_to_read) + ")"); throw; } catch (...) @@ -235,7 +261,7 @@ void MergeTreeReader::readData( static bool arrayHasNoElementsRead(const IColumn & column) { - const ColumnArray * column_array = typeid_cast(&column); + const auto * column_array = typeid_cast(&column); if (!column_array) return false; @@ -253,22 +279,31 @@ static bool arrayHasNoElementsRead(const IColumn & column) } -void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, bool & should_evaluate_missing_defaults, size_t num_rows) +void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) { try { + size_t num_columns = columns.size(); + + if (res_columns.size() != num_columns) + throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. " + "Expected " + toString(num_columns) + ", " + "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); + /// For a missing column of a nested data structure we must create not a column of empty /// arrays, but a column of arrays of correct length. /// First, collect offset columns for all arrays in the block. OffsetColumns offset_columns; - for (size_t i = 0; i < res.columns(); ++i) + auto requested_column = columns.begin(); + for (size_t i = 0; i < num_columns; ++i, ++requested_column) { - const ColumnWithTypeAndName & column = res.safeGetByPosition(i); + if (res_columns[i] == nullptr) + continue; - if (const ColumnArray * array = typeid_cast(column.column.get())) + if (const auto * array = typeid_cast(res_columns[i].get())) { - String offsets_name = Nested::extractTableName(column.name); + String offsets_name = Nested::extractTableName(requested_column->name); auto & offsets_column = offset_columns[offsets_name]; /// If for some reason multiple offsets columns are present for the same nested data structure, @@ -279,54 +314,43 @@ void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, boo } should_evaluate_missing_defaults = false; - should_reorder = false; /// insert default values only for columns without default expressions - for (const auto & requested_column : columns) + requested_column = columns.begin(); + for (size_t i = 0; i < num_columns; ++i, ++requested_column) { - bool has_column = res.has(requested_column.name); - if (has_column) - { - const auto & col = *res.getByName(requested_column.name).column; - if (arrayHasNoElementsRead(col)) - { - res.erase(requested_column.name); - has_column = false; - } - } + auto & [name, type] = *requested_column; - if (!has_column) + if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i])) + res_columns[i] = nullptr; + + if (res_columns[i]) { - should_reorder = true; - if (storage.getColumns().hasDefault(requested_column.name)) + if (storage.getColumns().hasDefault(name)) { should_evaluate_missing_defaults = true; continue; } - ColumnWithTypeAndName column_to_add; - column_to_add.name = requested_column.name; - column_to_add.type = requested_column.type; - - String offsets_name = Nested::extractTableName(column_to_add.name); - if (offset_columns.count(offsets_name)) + String offsets_name = Nested::extractTableName(name); + auto offset_it = offset_columns.find(offsets_name); + if (offset_it != offset_columns.end()) { - ColumnPtr offsets_column = offset_columns[offsets_name]; - DataTypePtr nested_type = typeid_cast(*column_to_add.type).getNestedType(); + ColumnPtr offsets_column = offset_it->second; + DataTypePtr nested_type = typeid_cast(*type).getNestedType(); size_t nested_rows = typeid_cast(*offsets_column).getData().back(); - ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst(); + ColumnPtr nested_column = + nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst(); - column_to_add.column = ColumnArray::create(nested_column, offsets_column); + res_columns[i] = ColumnArray::create(nested_column, offsets_column); } else { - /// We must turn a constant column into a full column because the interpreter could infer that it is constant everywhere - /// but in some blocks (from other parts) it can be a full column. - column_to_add.column = column_to_add.type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); + /// We must turn a constant column into a full column because the interpreter could infer + /// that it is constant everywhere but in some blocks (from other parts) it can be a full column. + res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst(); } - - res.insert(std::move(column_to_add)); } } } @@ -338,34 +362,35 @@ void MergeTreeReader::fillMissingColumns(Block & res, bool & should_reorder, boo } } -void MergeTreeReader::reorderColumns(Block & res, const Names & ordered_names, const String * filter_name) +void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns) { try { - Block ordered_block; + size_t num_columns = columns.size(); - for (const auto & name : ordered_names) - if (res.has(name)) - ordered_block.insert(res.getByName(name)); + if (res_columns.size() != num_columns) + throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. " + "Expected " + toString(num_columns) + ", " + "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); - if (filter_name && !ordered_block.has(*filter_name) && res.has(*filter_name)) - ordered_block.insert(res.getByName(*filter_name)); + /// Convert columns list to block. + /// TODO: rewrite with columns interface. It wll be possible after changes in ExpressionActions. + Block block; + auto name_and_type = columns.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) + { + if (res_columns[pos] == nullptr) + continue; - std::swap(res, ordered_block); - } - catch (Exception & e) - { - /// Better diagnostics. - e.addMessage("(while reading from part " + path + ")"); - throw; - } -} + block.insert({res_columns[pos], name_and_type->type, name_and_type->name}); + } -void MergeTreeReader::evaluateMissingDefaults(Block & res) -{ - try - { - DB::evaluateMissingDefaults(res, columns, storage.getColumns().getDefaults(), storage.global_context); + DB::evaluateMissingDefaults(block, columns, storage.getColumns().getDefaults(), storage.global_context); + + /// Move columns from block. + name_and_type = columns.begin(); + for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) + res_columns[pos] = std::move(block.getByName(name_and_type->name).column); } catch (Exception & e) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index 25f4c9ddd32..367f1bbb530 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -19,14 +19,17 @@ public: using ValueSizeMap = std::map; using DeserializeBinaryBulkStateMap = std::map; - MergeTreeReader(const String & path_, /// Path to the directory containing the part - const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_, + MergeTreeReader(String path_, /// Path to the directory containing the part + MergeTreeData::DataPartPtr data_part_, + NamesAndTypesList columns_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, bool save_marks_in_cache_, - const MergeTreeData & storage_, const MarkRanges & all_mark_ranges_, - size_t aio_threshold_, size_t max_read_buffer_size_, - const ValueSizeMap & avg_value_size_hints_ = ValueSizeMap{}, + const MergeTreeData & storage_, + MarkRanges all_mark_ranges_, + size_t aio_threshold_, + size_t max_read_buffer_size_, + ValueSizeMap avg_value_size_hints_ = ValueSizeMap{}, const ReadBufferFromFileBase::ProfileCallback & profile_callback_ = ReadBufferFromFileBase::ProfileCallback{}, clockid_t clock_type_ = CLOCK_MONOTONIC_COARSE); @@ -36,20 +39,17 @@ public: /// Add columns from ordered_names that are not present in the block. /// Missing columns are added in the order specified by ordered_names. - /// If at least one column was added, reorders all columns in the block according to ordered_names. - /// num_rows is needed in case block is empty. - void fillMissingColumns(Block & res, bool & should_reorder, bool & should_evaluate_missing_defaults, size_t num_rows); - /// Sort columns to ensure consistent order among all blocks. - /// If filter_name is not nullptr and block has filter column, move it to the end of block. - void reorderColumns(Block & res, const Names & ordered_names, const String * filter_name); + /// num_rows is needed in case if all res_columns are nullptr. + void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows); /// Evaluate defaulted columns if necessary. - void evaluateMissingDefaults(Block & res); + void evaluateMissingDefaults(Columns & res_columns); const NamesAndTypesList & getColumns() const { return columns; } /// Return the number of rows has been read or zero if there is no columns to read. - /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark - size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Block & res); + /// If continue_reading is true, continue reading from last state, otherwise seek to from_mark. + /// Fills res_columns in order specified in getColumns() list. If column was not read it will be nullptr. + size_t readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns); MergeTreeData::DataPartPtr data_part; From 01579296f13d4556b650368bd57cd4f6a9fbf202 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 25 Sep 2019 19:00:42 +0300 Subject: [PATCH 010/222] Link fixes. --- docs/en/operations/table_engines/hdfs.md | 2 +- docs/ru/development/build_cross.md | 1 + docs/ru/operations/table_engines/hdfs.md | 2 +- docs/ru/query_language/agg_functions/parametric_functions.md | 2 +- docs/ru/query_language/functions/other_functions.md | 2 +- docs/ru/query_language/select.md | 2 +- docs/toc_en.yml | 1 + docs/toc_ru.yml | 1 + 8 files changed, 8 insertions(+), 5 deletions(-) create mode 120000 docs/ru/development/build_cross.md diff --git a/docs/en/operations/table_engines/hdfs.md b/docs/en/operations/table_engines/hdfs.md index 1f6ecc50a79..9e2947341bf 100644 --- a/docs/en/operations/table_engines/hdfs.md +++ b/docs/en/operations/table_engines/hdfs.md @@ -58,7 +58,7 @@ Multiple path components can have globs. For being processed file should exists - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Substitutes any number in range from N to M including both borders. - Constructions with `{}` are similar to the [remote table function](../../query_language/table_functions/remote.md)). + Constructions with `{}` are similar to the [remote](../../query_language/table_functions/remote.md) table function. **Example** diff --git a/docs/ru/development/build_cross.md b/docs/ru/development/build_cross.md new file mode 120000 index 00000000000..f595f252de3 --- /dev/null +++ b/docs/ru/development/build_cross.md @@ -0,0 +1 @@ +../../en/development/build_cross.md \ No newline at end of file diff --git a/docs/ru/operations/table_engines/hdfs.md b/docs/ru/operations/table_engines/hdfs.md index 303f0a07d19..b384eb3bf60 100644 --- a/docs/ru/operations/table_engines/hdfs.md +++ b/docs/ru/operations/table_engines/hdfs.md @@ -55,7 +55,7 @@ SELECT * FROM hdfs_engine_table LIMIT 2 - `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно. -Конструкция с `{}` аналогична табличной функции [remote](remote.md). +Конструкция с `{}` аналогична табличной функции [remote](../../query_language/table_functions/remote.md). **Пример** diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md index 5adf20dfce5..b0ece3ced11 100644 --- a/docs/ru/query_language/agg_functions/parametric_functions.md +++ b/docs/ru/query_language/agg_functions/parametric_functions.md @@ -45,7 +45,7 @@ FROM ( └─────────────────────────────────────────────────────────────────────────┘ ``` -С помощью функции [bar](../other_functions.md#function-bar) можно визуализировать гистограмму, например: +С помощью функции [bar](../functions/other_functions.md#function-bar) можно визуализировать гистограмму, например: ```sql WITH histogram(5)(rand() % 100) AS hist diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index 987840cac99..3cc56bb1217 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -117,7 +117,7 @@ SELECT visibleWidth(NULL) Функция кидает исключение, если таблица не существует. Для элементов вложенной структуры данных функция проверяет существование столбца. Для самой же вложенной структуры данных функция возвращает 0. -## bar +## bar {#function-bar} Позволяет построить unicode-art диаграмму. diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index d206ba42c0b..61854066f32 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -92,7 +92,7 @@ FROM └───────────┴───────────┘ ``` -### Секция FROM +### Секция FROM {#select-from} Если секция FROM отсутствует, то данные будут читаться из таблицы `system.one`. Таблица `system.one` содержит ровно одну строку (то есть, эта таблица выполняет такую же роль, как таблица DUAL, которую можно найти в других СУБД). diff --git a/docs/toc_en.yml b/docs/toc_en.yml index dccd51f3cb1..b3a46303e49 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -210,6 +210,7 @@ nav: - 'Overview of ClickHouse Architecture': 'development/architecture.md' - 'How to Build ClickHouse on Linux': 'development/build.md' - 'How to Build ClickHouse on Mac OS X': 'development/build_osx.md' + - 'How to Build ClickHouse on Linux for Mac OS X': 'development/build_cross.md' - 'How to Write C++ code': 'development/style.md' - 'How to Run ClickHouse Tests': 'development/tests.md' - 'Third-Party Libraries Used': 'development/contrib.md' diff --git a/docs/toc_ru.yml b/docs/toc_ru.yml index b21bcc838dc..98c7b27a746 100644 --- a/docs/toc_ru.yml +++ b/docs/toc_ru.yml @@ -211,6 +211,7 @@ nav: - 'Обзор архитектуры ClickHouse': 'development/architecture.md' - 'Как собрать ClickHouse на Linux': 'development/build.md' - 'Как собрать ClickHouse на Mac OS X': 'development/build_osx.md' + - 'Как собрать ClickHouse на Linux для Mac OS X': 'development/build_cross.md' - 'Как писать код на C++': 'development/style.md' - 'Как запустить тесты': 'development/tests.md' - 'Сторонние библиотеки': 'development/contrib.md' From b65fe57319073da8cd6769324aef9a1d6908686e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 26 Sep 2019 20:29:41 +0300 Subject: [PATCH 011/222] Update MergeTreeRangeReader. --- .../MergeTreeBaseSelectBlockInputStream.cpp | 10 +++++- .../MergeTree/MergeTreeBlockReadUtils.cpp | 36 +++++++++++-------- .../MergeTree/MergeTreeBlockReadUtils.h | 4 +-- .../MergeTree/MergeTreeRangeReader.cpp | 22 +++++++++--- .../Storages/MergeTree/MergeTreeRangeReader.h | 9 ++--- 5 files changed, 55 insertions(+), 26 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp index 731624d1997..1f899c6b592 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp @@ -161,6 +161,14 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() if (read_result.num_rows == 0) read_result.columns.clear(); + auto & sample_block = getPort().getHeader(); + if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) + throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. " + "Have " + toString(sample_block.columns()) + " in sample block " + "and " + toString(read_result.columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + + /// TODO: check columns have the same types as in header. + UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; /// TODO @@ -171,7 +179,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows); if (!read_result.columns.empty()) - task->size_predictor->update(read_result.columns); + task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows); } return Chunk(std::move(read_result.columns), read_result.num_rows); diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 7dc9a40e89a..920697f3c32 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -84,22 +84,25 @@ MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor( { number_of_rows_in_part = data_part->rows_count; /// Initialize with sample block until update won't called. - initialize(sample_block, columns); + initialize(sample_block, {}, columns); } -void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const Names & columns, bool from_update) +void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update) { fixed_columns_bytes_per_row = 0; dynamic_columns_infos.clear(); std::unordered_set names_set; if (!from_update) - names_set.insert(columns.begin(), columns.end()); + names_set.insert(names.begin(), names.end()); - for (const auto & column_with_type_and_name : sample_block) + size_t num_columns = sample_block.columns(); + for (size_t pos = 0; pos < num_columns; ++pos) { + const auto & column_with_type_and_name = sample_block.getByPosition(pos); const String & column_name = column_with_type_and_name.name; - const ColumnPtr & column_data = column_with_type_and_name.column; + const ColumnPtr & column_data = from_update ? columns[pos] + : column_with_type_and_name.column; if (!from_update && !names_set.count(column_name)) continue; @@ -151,25 +154,30 @@ void MergeTreeBlockSizePredictor::startBlock() /// TODO: add last_read_row_in_part parameter to take into account gaps between adjacent ranges -void MergeTreeBlockSizePredictor::update(const Block & block, double decay) +void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay) { + if (columns.size() != sample_block.columns()) + throw Exception("Inconsistent number of columns passed to MergeTreeBlockSizePredictor. " + "Have " + toString(sample_block.columns()) + " in sample block " + "and " + toString(columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + if (!is_initialized_in_update) { /// Reinitialize with read block to update estimation for DEFAULT and MATERIALIZED columns without data. - initialize(block, {}, true); + initialize(sample_block, columns, {}, true); is_initialized_in_update = true; } - size_t new_rows = block.rows(); - if (new_rows < block_size_rows) + + if (num_rows < block_size_rows) { - throw Exception("Updated block has less rows (" + toString(new_rows) + ") than previous one (" + toString(block_size_rows) + ")", + throw Exception("Updated block has less rows (" + toString(num_rows) + ") than previous one (" + toString(block_size_rows) + ")", ErrorCodes::LOGICAL_ERROR); } - size_t diff_rows = new_rows - block_size_rows; - block_size_bytes = new_rows * fixed_columns_bytes_per_row; + size_t diff_rows = num_rows - block_size_rows; + block_size_bytes = num_rows * fixed_columns_bytes_per_row; bytes_per_row_current = fixed_columns_bytes_per_row; - block_size_rows = new_rows; + block_size_rows = num_rows; /// Make recursive updates for each read row: v_{i+1} = (1 - decay) v_{i} + decay v_{target} /// Use sum of geometric sequence formula to update multiple rows: v{n} = (1 - decay)^n v_{0} + (1 - (1 - decay)^n) v_{target} @@ -179,7 +187,7 @@ void MergeTreeBlockSizePredictor::update(const Block & block, double decay) max_size_per_row_dynamic = 0; for (auto & info : dynamic_columns_infos) { - size_t new_size = block.getByName(info.name).column->byteSize(); + size_t new_size = columns[sample_block.getPositionByName(info.name)]->byteSize(); size_t diff_size = new_size - info.size_bytes; double local_bytes_per_row = static_cast(diff_size) / diff_rows; diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index a031255b3ab..19c6adbd9c7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -85,7 +85,7 @@ struct MergeTreeBlockSizePredictor void startBlock(); /// Updates statistic for more accurate prediction - void update(const Block & block, double decay = DECAY()); + void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = DECAY()); /// Return current block size (after update()) inline size_t getBlockSize() const @@ -148,7 +148,7 @@ protected: bool is_initialized_in_update = false; - void initialize(const Block & sample_block, const Names & columns, bool from_update = false); + void initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update = false); public: diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 99d83789f45..8cac9fcfad8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -1,7 +1,6 @@ #include #include #include -#include #include #include @@ -409,14 +408,27 @@ void MergeTreeRangeReader::ReadResult::setFilter(const ColumnPtr & new_filter) MergeTreeRangeReader::MergeTreeRangeReader( MergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, ExpressionActionsPtr alias_actions_, ExpressionActionsPtr prewhere_actions_, - const String * prewhere_column_name_, const Names * ordered_names_, - bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_) + const String * prewhere_column_name_, bool remove_prewhere_column_, bool last_reader_in_chain_) : merge_tree_reader(merge_tree_reader_), index_granularity(&(merge_tree_reader->data_part->index_granularity)) , prev_reader(prev_reader_), prewhere_column_name(prewhere_column_name_) - , ordered_names(ordered_names_), alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_)) - , always_reorder(always_reorder_), remove_prewhere_column(remove_prewhere_column_) + , alias_actions(std::move(alias_actions_)), prewhere_actions(std::move(prewhere_actions_)) + , remove_prewhere_column(remove_prewhere_column_) , last_reader_in_chain(last_reader_in_chain_), is_initialized(true) { + if (prev_reader) + sample_block = prev_reader->getSampleBlock(); + + for (auto & name_and_type : merge_tree_reader->getColumns()) + sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name}); + + if (alias_actions) + alias_actions->execute(sample_block, true); + + if (prewhere_actions) + prewhere_actions->execute(sample_block, true); + + if (remove_prewhere_column) + sample_block.erase(*prewhere_column_name); } bool MergeTreeRangeReader::isReadingFinished() const diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h index d3f1333289b..67d5cbc3908 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.h @@ -22,8 +22,7 @@ class MergeTreeRangeReader public: MergeTreeRangeReader(MergeTreeReader * merge_tree_reader_, MergeTreeRangeReader * prev_reader_, ExpressionActionsPtr alias_actions_, ExpressionActionsPtr prewhere_actions_, - const String * prewhere_column_name_, const Names * ordered_names_, - bool always_reorder_, bool remove_prewhere_column_, bool last_reader_in_chain_); + const String * prewhere_column_name_, bool remove_prewhere_column_, bool last_reader_in_chain_); MergeTreeRangeReader() = default; @@ -185,6 +184,8 @@ public: ReadResult read(size_t max_rows, MarkRanges & ranges); + const Block & getSampleBlock() const { return sample_block; } + private: ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges); @@ -197,13 +198,13 @@ private: MergeTreeRangeReader * prev_reader = nullptr; /// If not nullptr, read from prev_reader firstly. const String * prewhere_column_name = nullptr; - const Names * ordered_names = nullptr; ExpressionActionsPtr alias_actions = nullptr; /// If not nullptr, calculate aliases. ExpressionActionsPtr prewhere_actions = nullptr; /// If not nullptr, calculate filter. Stream stream; - bool always_reorder = true; + Block sample_block; + bool remove_prewhere_column = false; bool last_reader_in_chain = false; bool is_initialized = false; From 1689576770a0909043b4cb72b964ded31425d557 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 1 Oct 2019 19:50:08 +0300 Subject: [PATCH 012/222] Update MergeTreeDataSelectExecutor. --- .../Transforms/AddingConstColumnTransform.h | 40 +++++ .../Transforms/ReverseTransform.cpp | 22 +++ .../Processors/Transforms/ReverseTransform.h | 17 ++ ...m.cpp => MergeTreeBaseSelectProcessor.cpp} | 54 +++--- ...tream.h => MergeTreeBaseSelectProcessor.h} | 6 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 163 +++++++++++++----- .../MergeTree/MergeTreeDataSelectExecutor.h | 2 +- .../Storages/MergeTree/MergeTreeReadPool.cpp | 2 +- ...pp => MergeTreeReverseSelectProcessor.cpp} | 90 +++++----- ...am.h => MergeTreeReverseSelectProcessor.h} | 16 +- ...tream.cpp => MergeTreeSelectProcessor.cpp} | 70 ++++---- ...putStream.h => MergeTreeSelectProcessor.h} | 13 +- .../MergeTreeSequentialBlockInputStream.cpp | 27 ++- ...geTreeThreadSelectBlockInputProcessor.cpp} | 18 +- ...ergeTreeThreadSelectBlockInputProcessor.h} | 8 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 13 +- .../Storages/StorageReplicatedMergeTree.cpp | 2 +- .../src/Storages/StorageReplicatedMergeTree.h | 2 +- 18 files changed, 373 insertions(+), 192 deletions(-) create mode 100644 dbms/src/Processors/Transforms/AddingConstColumnTransform.h create mode 100644 dbms/src/Processors/Transforms/ReverseTransform.cpp create mode 100644 dbms/src/Processors/Transforms/ReverseTransform.h rename dbms/src/Storages/MergeTree/{MergeTreeBaseSelectBlockInputStream.cpp => MergeTreeBaseSelectProcessor.cpp} (84%) rename dbms/src/Storages/MergeTree/{MergeTreeBaseSelectBlockInputStream.h => MergeTreeBaseSelectProcessor.h} (93%) rename dbms/src/Storages/MergeTree/{MergeTreeReverseSelectBlockInputStream.cpp => MergeTreeReverseSelectProcessor.cpp} (80%) rename dbms/src/Storages/MergeTree/{MergeTreeReverseSelectBlockInputStream.h => MergeTreeReverseSelectProcessor.h} (82%) rename dbms/src/Storages/MergeTree/{MergeTreeSelectBlockInputStream.cpp => MergeTreeSelectProcessor.cpp} (85%) rename dbms/src/Storages/MergeTree/{MergeTreeSelectBlockInputStream.h => MergeTreeSelectProcessor.h} (85%) rename dbms/src/Storages/MergeTree/{MergeTreeThreadSelectBlockInputStream.cpp => MergeTreeThreadSelectBlockInputProcessor.cpp} (84%) rename dbms/src/Storages/MergeTree/{MergeTreeThreadSelectBlockInputStream.h => MergeTreeThreadSelectBlockInputProcessor.h} (83%) diff --git a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h new file mode 100644 index 00000000000..aea9ee392b5 --- /dev/null +++ b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h @@ -0,0 +1,40 @@ +#pragma once +#include + +namespace DB +{ + +/// Adds a materialized const column to the chunk with a specified value. +template +class AddingConstColumnTransform : public ISimpleTransform +{ +public: + AddingConstColumnTransform(const Block & header, DataTypePtr data_type_, T value_, const String & column_name_) + : ISimpleTransform(header, addColumn(header, data_type_, column_name_), false) + , data_type(std::move(data_type_)), value(value_) {} + + String getName() const override { return "AddingConstColumnTransform"; } + +protected: + void transform(Chunk & chunk) override + { + auto num_rows = chunk.getNumRows(); + auto columns = chunk.detachColumns(); + + columns.emplace_back(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst()); + + chunk.setColumns(std::move(columns), num_rows); + } + +private: + static Block addColumn(Block header, const DataTypePtr & data_type, const String & column_name) + { + header.insert({data_type->createColumn(), data_type, column_name}); + return header; + } + + DataTypePtr data_type; + T value; +}; + +} diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp new file mode 100644 index 00000000000..eb2b39d26d1 --- /dev/null +++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp @@ -0,0 +1,22 @@ +#include + +namespace DB +{ + +void ReverseTransform::transform(Chunk & chunk) +{ + IColumn::Permutation permutation; + + size_t num_rows = chunk.getNumRows(); + for (size_t i = 0; i < num_rows; ++i) + permutation.emplace_back(num_rows - 1 - i); + + auto columns = chunk.detachColumns(); + + for (auto & column : columns) + column = column->permute(permutation, 0); + + chunk.setColumns(std::move(columns), num_rows); +} + +} diff --git a/dbms/src/Processors/Transforms/ReverseTransform.h b/dbms/src/Processors/Transforms/ReverseTransform.h new file mode 100644 index 00000000000..2e3eca25648 --- /dev/null +++ b/dbms/src/Processors/Transforms/ReverseTransform.h @@ -0,0 +1,17 @@ +#pragma once +#include + +namespace DB +{ + +class ReverseTransform : public ISimpleTransform +{ +public: + explicit ReverseTransform(const Block & header) : ISimpleTransform(header, header, false) {} + String getName() const override { return "ReverseTransform"; } + +protected: + void transform(Chunk & chunk) override; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp similarity index 84% rename from dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp rename to dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 1f899c6b592..0f03a2c8f57 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -19,7 +19,7 @@ namespace ErrorCodes } -MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor( +MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( Block header, const MergeTreeData & storage_, const PrewhereInfoPtr & prewhere_info_, @@ -47,7 +47,7 @@ MergeTreeBaseSelectBlockInputProcessor::MergeTreeBaseSelectBlockInputProcessor( } -Chunk MergeTreeBaseSelectBlockInputProcessor::generate() +Chunk MergeTreeBaseSelectProcessor::generate() { while (!isCancelled()) { @@ -67,7 +67,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::generate() } -void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeReadTask & current_task) +void MergeTreeBaseSelectProcessor::initializeRangeReaders(MergeTreeReadTask & current_task) { if (prewhere_info) { @@ -76,8 +76,8 @@ void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeRea current_task.range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, - &prewhere_info->prewhere_column_name, ¤t_task.ordered_names, - current_task.should_reorder, current_task.remove_prewhere_column, true); + &prewhere_info->prewhere_column_name, + current_task.remove_prewhere_column, true); } else { @@ -87,26 +87,26 @@ void MergeTreeBaseSelectBlockInputProcessor::initializeRangeReaders(MergeTreeRea current_task.pre_range_reader = MergeTreeRangeReader( pre_reader.get(), nullptr, prewhere_info->alias_actions, prewhere_info->prewhere_actions, - &prewhere_info->prewhere_column_name, ¤t_task.ordered_names, - current_task.should_reorder, current_task.remove_prewhere_column, false); + &prewhere_info->prewhere_column_name, + current_task.remove_prewhere_column, false); pre_reader_ptr = ¤t_task.pre_range_reader; } current_task.range_reader = MergeTreeRangeReader( reader.get(), pre_reader_ptr, nullptr, nullptr, - nullptr, ¤t_task.ordered_names, true, false, true); + nullptr, false, true); } } else { current_task.range_reader = MergeTreeRangeReader( reader.get(), nullptr, nullptr, nullptr, - nullptr, ¤t_task.ordered_names, current_task.should_reorder, false, true); + nullptr, false, true); } } -Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() +Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() { if (task->size_predictor) task->size_predictor->startBlock(); @@ -171,8 +171,7 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; - /// TODO - /// progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); + /// TODO: progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); if (task->size_predictor) { @@ -182,11 +181,26 @@ Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPartImpl() task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows); } - return Chunk(std::move(read_result.columns), read_result.num_rows); + if (read_result.num_rows == 0) + return {}; + + auto & header = getPort().getHeader(); + Columns ordered_columns; + size_t num_virtual_columns = virt_column_names.size(); + ordered_columns.reserve(header.columns() - num_virtual_columns); + + /// Reorder columns. TODO: maybe skip for default case. + for (size_t ps = 0; ps + num_virtual_columns < header.columns(); ++ps) + { + auto pos_in_sample_block = sample_block.getPositionByName(header.getByPosition(ps).name); + ordered_columns.emplace_back(std::move(read_result.columns[pos_in_sample_block])); + } + + return Chunk(std::move(ordered_columns), read_result.num_rows); } -Chunk MergeTreeBaseSelectBlockInputProcessor::readFromPart() +Chunk MergeTreeBaseSelectProcessor::readFromPart() { if (!task->range_reader.isInitialized()) initializeRangeReaders(*task); @@ -267,13 +281,13 @@ namespace }; } -void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns) +void MergeTreeBaseSelectProcessor::injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns) { InsertIntoBlockCallback callback { block }; injectVirtualColumnsImpl(block.rows(), callback, task, virtual_columns); } -void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns) +void MergeTreeBaseSelectProcessor::injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns) { UInt64 num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); @@ -284,7 +298,7 @@ void MergeTreeBaseSelectBlockInputProcessor::injectVirtualColumns(Chunk & chunk, chunk.setColumns(columns, num_rows); } -void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info) +void MergeTreeBaseSelectProcessor::executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info) { if (prewhere_info) { @@ -300,7 +314,7 @@ void MergeTreeBaseSelectBlockInputProcessor::executePrewhereActions(Block & bloc } } -Block MergeTreeBaseSelectBlockInputProcessor::getHeader( +Block MergeTreeBaseSelectProcessor::getHeader( Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns) { executePrewhereActions(block, prewhere_info); @@ -309,6 +323,6 @@ Block MergeTreeBaseSelectBlockInputProcessor::getHeader( } -MergeTreeBaseSelectBlockInputProcessor::~MergeTreeBaseSelectBlockInputProcessor() = default; +MergeTreeBaseSelectProcessor::~MergeTreeBaseSelectProcessor() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h similarity index 93% rename from dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h rename to dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 0abbb2d001c..0197d481f13 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -16,10 +16,10 @@ class MarkCache; /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream -class MergeTreeBaseSelectBlockInputProcessor : public ISource +class MergeTreeBaseSelectProcessor : public ISource { public: - MergeTreeBaseSelectBlockInputProcessor( + MergeTreeBaseSelectProcessor( Block header, const MergeTreeData & storage_, const PrewhereInfoPtr & prewhere_info_, @@ -32,7 +32,7 @@ public: bool save_marks_in_cache_ = true, const Names & virt_column_names_ = {}); - ~MergeTreeBaseSelectBlockInputProcessor() override; + ~MergeTreeBaseSelectProcessor() override; static void executePrewhereActions(Block & block, const PrewhereInfoPtr & prewhere_info); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 95f76a4c7f7..af410c6ed14 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -5,10 +5,10 @@ #include #include -#include -#include +#include +#include #include -#include +#include #include #include #include @@ -53,7 +53,13 @@ namespace std #include #include #include - +#include +#include +#include +#include +#include +#include +#include namespace ProfileEvents { @@ -624,18 +630,36 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( } if (use_sampling) - for (auto & stream : res) - stream = std::make_shared(stream, filter_expression, filter_function->getColumnName()); + { + for (auto & pipe : res) + { + auto & output = pipe.back()->getOutputs().front(); + pipe.emplace_back(std::make_shared(output.getHeader(), filter_expression, filter_function->getColumnName(), false)); + connect(output, pipe.back()->getInputs().front()); + } + } /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. if (sample_factor_column_queried) - for (auto & stream : res) - stream = std::make_shared>( - stream, std::make_shared(), used_sample_factor, "_sample_factor"); + { + for (auto & pipe : res) + { + auto & output = pipe.back()->getOutputs().front(); + pipe.emplace_back(std::make_shared>( + output.getHeader(), std::make_shared(), used_sample_factor, "_sample_factor")); + connect(output, pipe.back()->getInputs().front()); + } + } if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) - for (auto & stream : res) - stream = std::make_shared(stream, query_info.prewhere_info->remove_columns_actions); + { + for (auto & pipe : res) + { + auto & output = pipe.back()->getOutputs().front(); + pipe.emplace_back(std::make_shared( + output.getHeader(), query_info.prewhere_info->remove_columns_actions)); + } + } return res; } @@ -724,15 +748,16 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( for (size_t i = 0; i < num_streams; ++i) { - res.emplace_back(std::make_shared( + res.push_back({std::make_shared( i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, - query_info.prewhere_info, settings, virt_columns)); + query_info.prewhere_info, settings, virt_columns)}); if (i == 0) { /// Set the approximate number of rows for the first source only - res.front()->addTotalRowsApprox(total_rows); + /// TODO + /// res.front()->addTotalRowsApprox(total_rows); } } } @@ -800,13 +825,13 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( parts.emplace_back(part); } - BlockInputStreamPtr source_stream = std::make_shared( + auto source_processor = std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - res.push_back(source_stream); + res.push_back({std::move(source_processor)}); } } @@ -865,10 +890,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( if (sum_marks > max_marks_to_use_cache) use_uncompressed_cache = false; - BlockInputStreams streams; + Pipes pipes; if (sum_marks == 0) - return streams; + return pipes; /// Let's split ranges to avoid reading much data. auto split_ranges = [rows_granularity = data_settings->index_granularity, max_block_size](const auto & ranges, int direction) @@ -922,7 +947,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( { size_t need_marks = min_marks_per_stream; - BlockInputStreams streams_per_thread; + std::vector streams_per_thread; + Processors pipe; /// Loop over parts. /// We will iteratively take part or some subrange of a part from the back @@ -982,27 +1008,29 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( ranges_to_get_from_part = split_ranges(ranges_to_get_from_part, sorting_info->direction); - BlockInputStreamPtr source_stream; if (sorting_info->direction == 1) { - source_stream = std::make_shared( + pipe.push_back({std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)}); } else { - source_stream = std::make_shared( + pipe.push_back({std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)}); - source_stream = std::make_shared(source_stream); + auto & output = pipe.back()->getOutputs().front(); + auto reverse_processor = std::make_shared(output.getHeader()); + connect(output, reverse_processor->getInputs().front()); + pipe.emplace_back(std::move(reverse_processor)); } - streams_per_thread.push_back(source_stream); + streams_per_thread.emplace_back(&pipe.back()->getOutputs().front()); } if (streams_per_thread.size() > 1) @@ -1013,16 +1041,27 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( sorting_info->direction, 1); for (auto & stream : streams_per_thread) - stream = std::make_shared(stream, sorting_key_prefix_expr); + { + pipe.emplace_back(std::make_shared(stream->getHeader(), sorting_key_prefix_expr)); + connect(*stream, pipe.back()->getInputs().front()); + stream = &pipe.back()->getOutputs().front(); + } - streams.push_back(std::make_shared( - streams_per_thread, sort_description, max_block_size)); + pipe.push_back(std::make_shared( + streams_per_thread.back()->getHeader(), streams_per_thread.size(), sort_description, max_block_size)); + + auto it = streams_per_thread.begin(); + for (auto & input : pipe.back()->getInputs()) + { + connect(**it, input); + ++it; + } } - else - streams.push_back(streams_per_thread.at(0)); + + pipes.push_back(std::move(pipe)); } - return streams; + return pipes; } @@ -1060,7 +1099,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (sum_marks > max_marks_to_use_cache) use_uncompressed_cache = false; - BlockInputStreams to_merge; + Pipes pipes; + std::vector to_merge; /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL @@ -1068,13 +1108,20 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( { RangesInDataPart & part = parts[part_index]; - BlockInputStreamPtr source_stream = std::make_shared( + auto source_processor = std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - to_merge.emplace_back(std::make_shared(source_stream, data.sorting_key_expr)); + auto & output = source_processor->getPort(); + auto expression_transform = std::make_shared(output.getHeader(), data.sorting_key_expr); + connect(output, expression_transform->getInputPort()); + + to_merge.emplace_back(&expression_transform->getOutputPort()); + + Processors pipe { std::move(source_processor), std::move(expression_transform) }; + pipes.emplace_back(std::move(pipe)); } Names sort_columns = data.sorting_key_columns; @@ -1086,42 +1133,74 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); + auto streams_to_merge = [&]() + { + size_t num_streams = to_merge.size(); + + BlockInputStreams streams; + streams.reserve(num_streams); + + for (size_t i = 0; i < num_streams; ++i) + streams.emplace_back(std::make_shared(pipes[i])); + + pipes.clear(); + return streams; + }; + + ProcessorPtr merged_processor; BlockInputStreamPtr merged; switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - merged = std::make_shared(to_merge, sort_description, max_block_size); + merged_processor = std::make_shared(header, to_merge.size(), sort_description, max_block_size); break; case MergeTreeData::MergingParams::Collapsing: merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column); + streams_to_merge(), sort_description, data.merging_params.sign_column); break; case MergeTreeData::MergingParams::Summing: - merged = std::make_shared(to_merge, + merged = std::make_shared(streams_to_merge(), sort_description, data.merging_params.columns_to_sum, max_block_size); break; case MergeTreeData::MergingParams::Aggregating: - merged = std::make_shared(to_merge, sort_description, max_block_size); + merged = std::make_shared(streams_to_merge(), sort_description, max_block_size); break; case MergeTreeData::MergingParams::Replacing: /// TODO Make ReplacingFinalBlockInputStream - merged = std::make_shared(to_merge, + merged = std::make_shared(streams_to_merge(), sort_description, data.merging_params.version_column, max_block_size); break; case MergeTreeData::MergingParams::VersionedCollapsing: /// TODO Make VersionedCollapsingFinalBlockInputStream merged = std::make_shared( - to_merge, sort_description, data.merging_params.sign_column, max_block_size); + streams_to_merge(), sort_description, data.merging_params.sign_column, max_block_size); break; case MergeTreeData::MergingParams::Graphite: throw Exception("GraphiteMergeTree doesn't support FINAL", ErrorCodes::LOGICAL_ERROR); } - return {merged}; + if (merged) + return {{std::make_shared(merged)}}; + + auto it = to_merge.begin(); + for (auto & input : merged_processor->getInputs()) + { + connect(**it, input); + ++it; + } + + Processors result; + result.reserve(2 * pipes.size() + 1); + for (auto & pipe : pipes) + for (auto & processor : pipe) + result.emplace_back(std::move(processor)); + + result.emplace_back(merged_processor); + return {result}; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 9b46b663ab2..bcb80ff9a37 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -17,7 +17,7 @@ class KeyCondition; class MergeTreeDataSelectExecutor { public: - MergeTreeDataSelectExecutor(const MergeTreeData & data_); + explicit MergeTreeDataSelectExecutor(const MergeTreeData & data_); /** When reading, selects a set of parts that covers the desired range of the index. * max_blocks_number_to_read - if not nullptr, do not read all the parts whose right border is greater than max_block in partition. diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp index 6298c098220..d308667a67b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include namespace ProfileEvents diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp similarity index 80% rename from dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp rename to dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 9b78517e742..ea250789dce 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include @@ -12,8 +12,27 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } +static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & data_part) +{ + /// Types may be different during ALTER (when this stream is used to perform an ALTER). + /// NOTE: We may use similar code to implement non blocking ALTERs. + for (const auto & name_type : data_part->columns) + { + if (header.has(name_type.name)) + { + auto & elem = header.getByName(name_type.name); + if (!elem.type->equals(*name_type.type)) + { + elem.type = name_type.type; + elem.column = elem.type->createColumn(); + } + } + } -MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream( + return std::move(header); +} + +MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( const MergeTreeData & storage_, const MergeTreeData::DataPartPtr & owned_data_part_, UInt64 max_block_size_rows_, @@ -31,7 +50,9 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream( size_t part_index_in_query_, bool quiet) : - MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_, + MergeTreeBaseSelectProcessor{ + replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_), + storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_, max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_}, required_columns{required_columns_}, @@ -55,28 +76,12 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream( : "") << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - addTotalRowsApprox(total_rows); - header = storage.getSampleBlockForColumns(required_columns); + /// TODO + /// addTotalRowsApprox(total_rows); - /// Types may be different during ALTER (when this stream is used to perform an ALTER). - /// NOTE: We may use similar code to implement non blocking ALTERs. - for (const auto & name_type : data_part->columns) - { - if (header.has(name_type.name)) - { - auto & elem = header.getByName(name_type.name); - if (!elem.type->equals(*name_type.type)) - { - elem.type = name_type.type; - elem.column = elem.type->createColumn(); - } - } - } - - executePrewhereActions(header, prewhere_info); - injectVirtualColumns(header); - - ordered_names = getHeader().getNames(); + ordered_names = getPort().getHeader().getNames(); + /// Remove virtual columns. + ordered_names.resize(ordered_names.size() - virt_column_names.size()); task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns); @@ -101,17 +106,10 @@ MergeTreeReverseSelectBlockInputStream::MergeTreeReverseSelectBlockInputStream( all_mark_ranges, min_bytes_to_use_direct_io, max_read_buffer_size); } - -Block MergeTreeReverseSelectBlockInputStream::getHeader() const -{ - return header; -} - - -bool MergeTreeReverseSelectBlockInputStream::getNewTask() +bool MergeTreeReverseSelectProcessor::getNewTask() try { - if ((blocks.empty() && all_mark_ranges.empty()) || total_marks_count == 0) + if ((chunks.empty() && all_mark_ranges.empty()) || total_marks_count == 0) { finish(); return false; @@ -145,14 +143,14 @@ catch (...) throw; } -Block MergeTreeReverseSelectBlockInputStream::readFromPart() +Chunk MergeTreeReverseSelectProcessor::readFromPart() { - Block res; + Chunk res; - if (!blocks.empty()) + if (!chunks.empty()) { - res = std::move(blocks.back()); - blocks.pop_back(); + res = std::move(chunks.back()); + chunks.pop_back(); return res; } @@ -161,20 +159,20 @@ Block MergeTreeReverseSelectBlockInputStream::readFromPart() while (!task->isFinished()) { - Block block = readFromPartImpl(); - blocks.push_back(std::move(block)); + Chunk chunk = readFromPartImpl(); + chunks.push_back(std::move(chunk)); } - if (blocks.empty()) + if (chunks.empty()) return {}; - res = std::move(blocks.back()); - blocks.pop_back(); + res = std::move(chunks.back()); + chunks.pop_back(); return res; } -void MergeTreeReverseSelectBlockInputStream::finish() +void MergeTreeReverseSelectProcessor::finish() { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, @@ -186,6 +184,6 @@ void MergeTreeReverseSelectBlockInputStream::finish() data_part.reset(); } -MergeTreeReverseSelectBlockInputStream::~MergeTreeReverseSelectBlockInputStream() = default; +MergeTreeReverseSelectProcessor::~MergeTreeReverseSelectProcessor() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h similarity index 82% rename from dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h rename to dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index 40af5d5d92a..dcba0ca5e36 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include @@ -13,10 +13,10 @@ namespace DB /// Used to read data from single part with select query /// Cares about PREWHERE, virtual columns, indexes etc. /// To read data from multiple parts, Storage (MergeTree) creates multiple such objects. -class MergeTreeReverseSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream +class MergeTreeReverseSelectProcessor : public MergeTreeBaseSelectProcessor { public: - MergeTreeReverseSelectBlockInputStream( + MergeTreeReverseSelectProcessor( const MergeTreeData & storage, const MergeTreeData::DataPartPtr & owned_data_part, UInt64 max_block_size_rows, @@ -34,19 +34,17 @@ public: size_t part_index_in_query = 0, bool quiet = false); - ~MergeTreeReverseSelectBlockInputStream() override; + ~MergeTreeReverseSelectProcessor() override; String getName() const override { return "MergeTreeReverse"; } - Block getHeader() const override; - /// Closes readers and unlock part locks void finish(); protected: bool getNewTask() override; - Block readFromPart() override; + Chunk readFromPart() override; private: Block header; @@ -73,9 +71,9 @@ private: String path; - Blocks blocks; + Chunks chunks; - Logger * log = &Logger::get("MergeTreeReverseSelectBlockInputStream"); + Logger * log = &Logger::get("MergeTreeReverseSelectProcessor"); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp similarity index 85% rename from dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp rename to dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 7a6e6f197dd..2a28cb9f738 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -1,5 +1,5 @@ -#include -#include +#include +#include #include #include @@ -12,8 +12,27 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } +static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & data_part) +{ + /// Types may be different during ALTER (when this stream is used to perform an ALTER). + /// NOTE: We may use similar code to implement non blocking ALTERs. + for (const auto & name_type : data_part->columns) + { + if (header.has(name_type.name)) + { + auto & elem = header.getByName(name_type.name); + if (!elem.type->equals(*name_type.type)) + { + elem.type = name_type.type; + elem.column = elem.type->createColumn(); + } + } + } -MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream( + return std::move(header); +} + +MergeTreeSelectProcessor::MergeTreeSelectProcessor( const MergeTreeData & storage_, const MergeTreeData::DataPartPtr & owned_data_part_, UInt64 max_block_size_rows_, @@ -31,10 +50,12 @@ MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream( size_t part_index_in_query_, bool quiet) : - MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_, + MergeTreeBaseSelectProcessor{ + replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_), + storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_, max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_}, - required_columns{required_columns_}, + required_columns{std::move(required_columns_)}, data_part{owned_data_part_}, part_columns_lock(data_part->columns_lock), all_mark_ranges(mark_ranges_), @@ -56,39 +77,16 @@ MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream( : "") << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - addTotalRowsApprox(total_rows); + /// TODO + /// addTotalRowsApprox(total_rows); - header = storage.getSampleBlockForColumns(required_columns); - - /// Types may be different during ALTER (when this stream is used to perform an ALTER). - /// NOTE: We may use similar code to implement non blocking ALTERs. - for (const auto & name_type : data_part->columns) - { - if (header.has(name_type.name)) - { - auto & elem = header.getByName(name_type.name); - if (!elem.type->equals(*name_type.type)) - { - elem.type = name_type.type; - elem.column = elem.type->createColumn(); - } - } - } - - executePrewhereActions(header, prewhere_info); - injectVirtualColumns(header); - - ordered_names = getHeader().getNames(); + ordered_names = getPort().getHeader().getNames(); + /// Remove virtual columns. + ordered_names.resize(ordered_names.size() - virt_column_names.size()); } -Block MergeTreeSelectBlockInputStream::getHeader() const -{ - return header; -} - - -bool MergeTreeSelectBlockInputStream::getNewTask() +bool MergeTreeSelectProcessor::getNewTask() try { /// Produce no more than one task @@ -149,7 +147,7 @@ catch (...) } -void MergeTreeSelectBlockInputStream::finish() +void MergeTreeSelectProcessor::finish() { /** Close the files (before destroying the object). * When many sources are created, but simultaneously reading only a few of them, @@ -162,7 +160,7 @@ void MergeTreeSelectBlockInputStream::finish() } -MergeTreeSelectBlockInputStream::~MergeTreeSelectBlockInputStream() = default; +MergeTreeSelectProcessor::~MergeTreeSelectProcessor() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h similarity index 85% rename from dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h rename to dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 0fc9830f5d0..0551d966481 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include @@ -13,10 +13,10 @@ namespace DB /// Used to read data from single part with select query /// Cares about PREWHERE, virtual columns, indexes etc. /// To read data from multiple parts, Storage (MergeTree) creates multiple such objects. -class MergeTreeSelectBlockInputStream : public MergeTreeBaseSelectBlockInputStream +class MergeTreeSelectProcessor : public MergeTreeBaseSelectProcessor { public: - MergeTreeSelectBlockInputStream( + MergeTreeSelectProcessor( const MergeTreeData & storage, const MergeTreeData::DataPartPtr & owned_data_part, UInt64 max_block_size_rows, @@ -34,12 +34,10 @@ public: size_t part_index_in_query = 0, bool quiet = false); - ~MergeTreeSelectBlockInputStream() override; + ~MergeTreeSelectProcessor() override; String getName() const override { return "MergeTree"; } - Block getHeader() const override; - /// Closes readers and unlock part locks void finish(); @@ -48,7 +46,6 @@ protected: bool getNewTask() override; private: - Block header; /// Used by Task Names required_columns; @@ -74,7 +71,7 @@ private: String path; bool is_first_task = true; - Logger * log = &Logger::get("MergeTreeSelectBlockInputStream"); + Logger * log = &Logger::get("MergeTreeSelectProcessor"); }; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp index 74cff479e5f..96e4d89ca84 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp @@ -91,23 +91,32 @@ try { size_t rows_to_read = data_part->index_granularity.getMarkRows(current_mark); bool continue_reading = (current_mark != 0); - size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, res); - if (res) + auto & sample = reader->getColumns(); + Columns columns(sample.size()); + size_t rows_readed = reader->readRows(current_mark, continue_reading, rows_to_read, columns); + + if (rows_readed) { - res.checkNumberOfRows(); - current_row += rows_readed; current_mark += (rows_to_read == rows_readed); - bool should_reorder = false, should_evaluate_missing_defaults = false; - reader->fillMissingColumns(res, should_reorder, should_evaluate_missing_defaults, res.rows()); + bool should_evaluate_missing_defaults = false; + reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_readed); if (should_evaluate_missing_defaults) - reader->evaluateMissingDefaults(res); + reader->evaluateMissingDefaults(columns); - if (should_reorder) - reader->reorderColumns(res, header.getNames(), nullptr); + /// Reorder columns and fill result block. + size_t num_columns = sample.size(); + auto it = sample.begin(); + for (size_t i = 0; i < num_columns; ++i) + { + res.insert({columns[i], it->type, it->name}); + ++it; + } + + res.checkNumberOfRows(); } } else diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp similarity index 84% rename from dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp rename to dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index cd6efa6b7d1..78122c53ac1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -1,13 +1,13 @@ #include #include -#include +#include namespace DB { -MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( +MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcessor( const size_t thread_, const MergeTreeReadPoolPtr & pool_, const size_t min_marks_to_read_, @@ -20,11 +20,11 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( const Settings & settings, const Names & virt_column_names_) : - MergeTreeBaseSelectBlockInputProcessor{pool->getHeader(), storage_, prewhere_info_, max_block_size_rows_, - preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_}, - thread{thread_}, - pool{pool_} + MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_, + preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io, + settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_}, + thread{thread_}, + pool{pool_} { /// round min_marks_to_read up to nearest multiple of block_size expressed in marks /// If granularity is adaptive it doesn't make sense @@ -42,7 +42,7 @@ MergeTreeThreadSelectBlockInputStream::MergeTreeThreadSelectBlockInputStream( } /// Requests read task from MergeTreeReadPool and signals whether it got one -bool MergeTreeThreadSelectBlockInputStream::getNewTask() +bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() { task = pool->getTask(min_marks_to_read, thread, ordered_names); @@ -105,6 +105,6 @@ bool MergeTreeThreadSelectBlockInputStream::getNewTask() } -MergeTreeThreadSelectBlockInputStream::~MergeTreeThreadSelectBlockInputStream() = default; +MergeTreeThreadSelectBlockInputProcessor::~MergeTreeThreadSelectBlockInputProcessor() = default; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h similarity index 83% rename from dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h rename to dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h index 9603d21fb33..fa760e319cb 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.h +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h @@ -1,5 +1,5 @@ #pragma once -#include +#include namespace DB @@ -11,10 +11,10 @@ class MergeTreeReadPool; /** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked * to perform. */ -class MergeTreeThreadSelectBlockInputStream : public MergeTreeBaseSelectBlockInputProcessor +class MergeTreeThreadSelectBlockInputProcessor : public MergeTreeBaseSelectProcessor { public: - MergeTreeThreadSelectBlockInputStream( + MergeTreeThreadSelectBlockInputProcessor( const size_t thread_, const std::shared_ptr & pool_, const size_t min_marks_to_read_, @@ -29,7 +29,7 @@ public: String getName() const override { return "MergeTreeThread"; } - ~MergeTreeThreadSelectBlockInputStream() override; + ~MergeTreeThreadSelectBlockInputProcessor() override; protected: /// Requests read task from MergeTreeReadPool and signals whether it got one diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 0e15a5660a9..37a3b931fa8 100644 --- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -28,8 +29,16 @@ public: size_t max_block_size, unsigned num_streams) override { - return MergeTreeDataSelectExecutor(part->storage).readFromParts( - {part}, column_names, query_info, context, max_block_size, num_streams); + auto pipes = MergeTreeDataSelectExecutor(part->storage).readFromParts( + {part}, column_names, query_info, context, max_block_size, num_streams); + + BlockInputStreams streams; + streams.reserve(pipes.size()); + + for (auto & pipe : pipes) + streams.emplace_back(std::make_shared(std::move(pipe))); + + return streams; } bool supportsIndexForIn() const override { return true; } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index e5821c1bcaf..4b07e03dcba 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2952,7 +2952,7 @@ StorageReplicatedMergeTree::~StorageReplicatedMergeTree() } -BlockInputStreams StorageReplicatedMergeTree::read( +Pipes StorageReplicatedMergeTree::readWithProcessors( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index c5000944439..4378b9fc23b 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -87,7 +87,7 @@ public: bool supportsReplication() const override { return true; } bool supportsDeduplication() const override { return true; } - BlockInputStreams read( + Pipes readWithProcessors( const Names & column_names, const SelectQueryInfo & query_info, const Context & context, From 54d32da5a180f860fa631705396291322e659d1d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 1 Oct 2019 21:30:23 +0300 Subject: [PATCH 013/222] Update TreeExecutor. --- dbms/src/Processors/Executors/TreeExecutor.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp index d7fc1b78ede..667c3a0e565 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -49,6 +49,8 @@ static void validateTree(const Processors & processors, IProcessor * root) throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. " "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR); + is_visited[position] = true; + checkProcessorHasSingleOutput(node); auto & children = node->getInputs(); @@ -72,6 +74,7 @@ void TreeExecutor::init() validateTree(processors, root); port = std::make_unique(getHeader(), root); + connect(root->getOutputs().front(), *port); port->setNeeded(); } From e48f7faebc1776c1a73d2f7767e87f2e05f0d7cf Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 2 Oct 2019 14:57:17 +0300 Subject: [PATCH 014/222] Fix MergeTreeRangeReader. Fix MergeTreeReader. Fix MergeTreeBaseSelectProcessor. Better exception message for TreeExecutor. Added header_without_virtual_columns to MergeTreeBaseSelectProcessor. Fix MergeTreeReverseSelectProcessor. Fix MergeTreeDataSelectExecutor. --- .../src/Processors/Executors/TreeExecutor.cpp | 15 ++++++- .../MergeTreeBaseSelectProcessor.cpp | 15 ++++--- .../MergeTree/MergeTreeBaseSelectProcessor.h | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 1 + .../MergeTree/MergeTreeRangeReader.cpp | 45 +++++++++++++------ .../Storages/MergeTree/MergeTreeReader.cpp | 11 +++-- dbms/src/Storages/MergeTree/MergeTreeReader.h | 2 +- .../MergeTreeReverseSelectProcessor.cpp | 6 +-- .../MergeTree/MergeTreeSelectProcessor.cpp | 6 +-- .../MergeTreeSequentialBlockInputStream.cpp | 2 +- 10 files changed, 68 insertions(+), 36 deletions(-) diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp index 667c3a0e565..593d455e672 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -83,11 +83,24 @@ void TreeExecutor::execute() std::stack stack; stack.push(root); + auto prepare_processor = [](IProcessor * processor) + { + try + { + return processor->prepare(); + } + catch (Exception & exception) + { + exception.addMessage(" While executing processor " + processor->getName()); + throw; + } + }; + while (!stack.empty()) { IProcessor * node = stack.top(); - auto status = node->prepare(); + auto status = prepare_processor(node); switch (status) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 0f03a2c8f57..77f33dce01d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -44,6 +44,11 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( save_marks_in_cache(save_marks_in_cache_), virt_column_names(virt_column_names_) { + header_without_virtual_columns = getPort().getHeader(); + + for (auto it = virt_column_names.rbegin(); it != virt_column_names.rend(); ++it) + if (header_without_virtual_columns.has(*it)) + header_without_virtual_columns.erase(*it); } @@ -161,7 +166,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() if (read_result.num_rows == 0) read_result.columns.clear(); - auto & sample_block = getPort().getHeader(); + auto & sample_block = task->range_reader.getSampleBlock(); if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. " "Have " + toString(sample_block.columns()) + " in sample block " @@ -184,15 +189,13 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() if (read_result.num_rows == 0) return {}; - auto & header = getPort().getHeader(); Columns ordered_columns; - size_t num_virtual_columns = virt_column_names.size(); - ordered_columns.reserve(header.columns() - num_virtual_columns); + ordered_columns.reserve(header_without_virtual_columns.columns()); /// Reorder columns. TODO: maybe skip for default case. - for (size_t ps = 0; ps + num_virtual_columns < header.columns(); ++ps) + for (size_t ps = 0; ps < header_without_virtual_columns.columns(); ++ps) { - auto pos_in_sample_block = sample_block.getPositionByName(header.getByPosition(ps).name); + auto pos_in_sample_block = sample_block.getPositionByName(header_without_virtual_columns.getByPosition(ps).name); ordered_columns.emplace_back(std::move(read_result.columns[pos_in_sample_block])); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 0197d481f13..db369a5f267 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -70,6 +70,7 @@ protected: bool save_marks_in_cache; Names virt_column_names; + Block header_without_virtual_columns; std::unique_ptr task; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index af410c6ed14..7df4178f58f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -658,6 +658,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( auto & output = pipe.back()->getOutputs().front(); pipe.emplace_back(std::make_shared( output.getHeader(), query_info.prewhere_info->remove_columns_actions)); + connect(output, pipe.back()->getInputs().front()); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 8cac9fcfad8..ec554d72339 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -530,15 +530,13 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, num_rows); } + if (!columns.empty() && should_evaluate_missing_defaults) + merge_tree_reader->evaluateMissingDefaults( + prev_reader->getSampleBlock().cloneWithColumns(read_result.columns), columns); + read_result.columns.reserve(read_result.columns.size() + columns.size()); for (auto & column : columns) read_result.columns.emplace_back(std::move(column)); - - if (!read_result.columns.empty()) - { - if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults(read_result.columns); - } } else { @@ -552,7 +550,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar read_result.num_rows); if (should_evaluate_missing_defaults) - merge_tree_reader->evaluateMissingDefaults(read_result.columns); + merge_tree_reader->evaluateMissingDefaults({}, read_result.columns); } else read_result.columns.clear(); @@ -691,8 +689,18 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r { /// Restore block from columns list. Block block; - auto name_and_type = header.begin(); - for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) + size_t pos = 0; + + if (prev_reader) + { + for (auto & col : prev_reader->getSampleBlock()) + { + block.insert({result.columns[pos], col.type, col.name}); + ++pos; + } + } + + for (auto name_and_type = header.begin(); pos < num_columns; ++pos, ++name_and_type) block.insert({result.columns[pos], name_and_type->type, name_and_type->name}); if (alias_actions) @@ -703,7 +711,7 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r prewhere_column_pos = block.getPositionByName(*prewhere_column_name); result.columns.clear(); - result.columns.resize(block.columns()); + result.columns.reserve(block.columns()); for (auto & col : block) result.columns.emplace_back(std::move(col.column)); @@ -761,10 +769,21 @@ void MergeTreeRangeReader::executePrewhereActionsAndFilterColumns(ReadResult & r { filterColumns(result.columns, *filter_description.data); - if (result.columns.empty()) + /// Get num rows after filtration. + bool has_column = false; + + for (auto & column : result.columns) + { + if (column) + { + has_column = true; + result.num_rows = column->size(); + break; + } + } + + if (!has_column) result.num_rows = getNumBytesInFilter(); - else - result.num_rows = result.columns[0]->size(); } } diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index cab963d0f66..29d1dac7587 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -324,7 +324,7 @@ void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_ev if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i])) res_columns[i] = nullptr; - if (res_columns[i]) + if (res_columns[i] == nullptr) { if (storage.getColumns().hasDefault(name)) { @@ -362,7 +362,7 @@ void MergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_ev } } -void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns) +void MergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns & res_columns) { try { @@ -375,22 +375,21 @@ void MergeTreeReader::evaluateMissingDefaults(Columns & res_columns) /// Convert columns list to block. /// TODO: rewrite with columns interface. It wll be possible after changes in ExpressionActions. - Block block; auto name_and_type = columns.begin(); for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) { if (res_columns[pos] == nullptr) continue; - block.insert({res_columns[pos], name_and_type->type, name_and_type->name}); + additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name}); } - DB::evaluateMissingDefaults(block, columns, storage.getColumns().getDefaults(), storage.global_context); + DB::evaluateMissingDefaults(additional_columns, columns, storage.getColumns().getDefaults(), storage.global_context); /// Move columns from block. name_and_type = columns.begin(); for (size_t pos = 0; pos < num_columns; ++pos, ++name_and_type) - res_columns[pos] = std::move(block.getByName(name_and_type->name).column); + res_columns[pos] = std::move(additional_columns.getByName(name_and_type->name).column); } catch (Exception & e) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.h b/dbms/src/Storages/MergeTree/MergeTreeReader.h index 367f1bbb530..a690e56155a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.h @@ -42,7 +42,7 @@ public: /// num_rows is needed in case if all res_columns are nullptr. void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows); /// Evaluate defaulted columns if necessary. - void evaluateMissingDefaults(Columns & res_columns); + void evaluateMissingDefaults(Block additional_columns, Columns & res_columns); const NamesAndTypesList & getColumns() const { return columns; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index ea250789dce..ff8d599135b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -51,7 +51,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_), + replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_), storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_, max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_}, @@ -79,9 +79,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( /// TODO /// addTotalRowsApprox(total_rows); - ordered_names = getPort().getHeader().getNames(); - /// Remove virtual columns. - ordered_names.resize(ordered_names.size() - virt_column_names.size()); + ordered_names = header_without_virtual_columns.getNames(); task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns); diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 2a28cb9f738..04954d6ff82 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -51,7 +51,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - replaceTypes(storage_.getSampleBlockForColumns(required_columns), owned_data_part_), + replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_), storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_, max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_}, @@ -80,9 +80,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( /// TODO /// addTotalRowsApprox(total_rows); - ordered_names = getPort().getHeader().getNames(); - /// Remove virtual columns. - ordered_names.resize(ordered_names.size() - virt_column_names.size()); + ordered_names = header_without_virtual_columns.getNames(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp index 96e4d89ca84..eeeb07f1c26 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp @@ -105,7 +105,7 @@ try reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_readed); if (should_evaluate_missing_defaults) - reader->evaluateMissingDefaults(columns); + reader->evaluateMissingDefaults({}, columns); /// Reorder columns and fill result block. size_t num_columns = sample.size(); From 627d48c19a0fc7e7149a2ec82ea54fb650a37ed5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 3 Oct 2019 21:27:11 +0300 Subject: [PATCH 015/222] Added ExecutionSpeedLimits. --- dbms/src/DataStreams/ExecutionSpeedLimits.cpp | 86 +++++++++++++++ dbms/src/DataStreams/ExecutionSpeedLimits.h | 27 +++++ dbms/src/DataStreams/IBlockInputStream.cpp | 102 ++---------------- dbms/src/DataStreams/IBlockInputStream.h | 12 +-- dbms/src/DataStreams/SizeLimits.cpp | 11 +- dbms/src/DataStreams/SizeLimits.h | 1 + .../Interpreters/InterpreterSelectQuery.cpp | 12 +-- .../Transforms/LimitsCheckingTransform.cpp | 6 +- .../Storages/Kafka/KafkaBlockInputStream.cpp | 2 +- dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- 10 files changed, 144 insertions(+), 117 deletions(-) create mode 100644 dbms/src/DataStreams/ExecutionSpeedLimits.cpp create mode 100644 dbms/src/DataStreams/ExecutionSpeedLimits.h diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp new file mode 100644 index 00000000000..8886ca4b2b8 --- /dev/null +++ b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp @@ -0,0 +1,86 @@ +#include + +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event ThrottlerSleepMicroseconds; +} + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_SLOW; +} + +static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds) +{ + /// How much time to wait for the average speed to become `max_speed_in_seconds`. + UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds; + + if (desired_microseconds > total_elapsed_microseconds) + { + UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds; + + /// Never sleep more than one second (it should be enough to limit speed for a reasonable amount, and otherwise it's too easy to make query hang). + sleep_microseconds = std::min(UInt64(1000000), sleep_microseconds); + + sleepForMicroseconds(sleep_microseconds); + + ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds); + } +} + +void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds) +{ + if ((min_execution_speed || max_execution_speed || min_execution_speed_bytes || + max_execution_speed_bytes || (total_rows && timeout_before_checking_execution_speed != 0)) && + (static_cast(total_elapsed_microseconds) > timeout_before_checking_execution_speed.totalMicroseconds())) + { + /// Do not count sleeps in throttlers + UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds]; + + double elapsed_seconds = 0; + if (throttler_sleep_microseconds > total_elapsed_microseconds) + elapsed_seconds = static_cast(total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0; + + if (elapsed_seconds > 0) + { + if (min_execution_speed && read_rows / elapsed_seconds < min_execution_speed) + throw Exception("Query is executing too slow: " + toString(read_rows / elapsed_seconds) + + " rows/sec., minimum: " + toString(min_execution_speed), + ErrorCodes::TOO_SLOW); + + if (min_execution_speed_bytes && read_bytes / elapsed_seconds < min_execution_speed_bytes) + throw Exception("Query is executing too slow: " + toString(read_bytes / elapsed_seconds) + + " bytes/sec., minimum: " + toString(min_execution_speed_bytes), + ErrorCodes::TOO_SLOW); + + /// If the predicted execution time is longer than `max_execution_time`. + if (max_execution_time != 0 && total_rows && read_rows) + { + double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows) / read_rows); + + if (estimated_execution_time_seconds > max_execution_time.totalSeconds()) + throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)" + + " is too long. Maximum: " + toString(max_execution_time.totalSeconds()) + + ". Estimated rows to process: " + toString(total_rows), + ErrorCodes::TOO_SLOW); + } + + if (max_execution_speed && read_rows / elapsed_seconds >= max_execution_speed) + limitProgressingSpeed(read_rows, max_execution_speed, total_elapsed_microseconds); + + if (max_execution_speed_bytes && read_bytes / elapsed_seconds >= max_execution_speed_bytes) + limitProgressingSpeed(read_bytes, max_execution_speed_bytes, total_elapsed_microseconds); + } + } +} + +} diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h new file mode 100644 index 00000000000..67627cb36bf --- /dev/null +++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h @@ -0,0 +1,27 @@ +#pragma once + +#include +#include + +namespace DB +{ + +/// Limits for query execution speed. +/// In rows per second. +class ExecutionSpeedLimits +{ +public: + size_t min_execution_speed = 0; + size_t max_execution_speed = 0; + size_t min_execution_speed_bytes = 0; + size_t max_execution_speed_bytes = 0; + + Poco::Timespan max_execution_time = 0; + /// Verify that the speed is not too low after the specified time has elapsed. + Poco::Timespan timeout_before_checking_execution_speed = 0; + + void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds); +}; + +} + diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index a2c3fb2247c..92bdc559a95 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -214,11 +214,11 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co bool IBlockInputStream::checkTimeLimit() { - if (limits.max_execution_time != 0 - && info.total_stopwatch.elapsed() > static_cast(limits.max_execution_time.totalMicroseconds()) * 1000) + if (limits.speed_limit.max_execution_time != 0 + && info.total_stopwatch.elapsed() > static_cast(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000) return handleOverflowMode(limits.timeout_overflow_mode, "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds()) - + " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0), + + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0), ErrorCodes::TIMEOUT_EXCEEDED); return true; @@ -247,24 +247,6 @@ void IBlockInputStream::checkQuota(Block & block) } } -static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds) -{ - /// How much time to wait for the average speed to become `max_speed_in_seconds`. - UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds; - - if (desired_microseconds > total_elapsed_microseconds) - { - UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds; - - /// Never sleep more than one second (it should be enough to limit speed for a reasonable amount, and otherwise it's too easy to make query hang). - sleep_microseconds = std::min(UInt64(1000000), sleep_microseconds); - - sleepForMicroseconds(sleep_microseconds); - - ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds); - } -} - void IBlockInputStream::progressImpl(const Progress & value) { @@ -284,40 +266,11 @@ void IBlockInputStream::progressImpl(const Progress & value) /** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read. * NOTE: Maybe it makes sense to have them checked directly in ProcessList? */ - - if (limits.mode == LIMITS_TOTAL - && ((limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows) - || (limits.size_limits.max_bytes && progress.read_bytes > limits.size_limits.max_bytes))) + if (limits.mode == LIMITS_TOTAL) { - switch (limits.size_limits.overflow_mode) - { - case OverflowMode::THROW: - { - if (limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows) - throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate) - + " rows read (or to read), maximum: " + toString(limits.size_limits.max_rows), - ErrorCodes::TOO_MANY_ROWS); - else - throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(progress.read_bytes) - + " bytes read, maximum: " + toString(limits.size_limits.max_bytes), - ErrorCodes::TOO_MANY_BYTES); - } - - case OverflowMode::BREAK: - { - /// For `break`, we will stop only if so many rows were actually read, and not just supposed to be read. - if ((limits.size_limits.max_rows && progress.read_rows > limits.size_limits.max_rows) - || (limits.size_limits.max_bytes && progress.read_bytes > limits.size_limits.max_bytes)) - { - cancel(false); - } - - break; - } - - default: - throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR); - } + if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read", + ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) + cancel(false); } size_t total_rows = progress.total_rows_to_read; @@ -331,46 +284,7 @@ void IBlockInputStream::progressImpl(const Progress & value) last_profile_events_update_time = total_elapsed_microseconds; } - if ((limits.min_execution_speed || limits.max_execution_speed || limits.min_execution_speed_bytes || - limits.max_execution_speed_bytes || (total_rows && limits.timeout_before_checking_execution_speed != 0)) && - (static_cast(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds())) - { - /// Do not count sleeps in throttlers - UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds]; - double elapsed_seconds = (throttler_sleep_microseconds > total_elapsed_microseconds) - ? 0.0 : (total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0; - - if (elapsed_seconds > 0) - { - if (limits.min_execution_speed && progress.read_rows / elapsed_seconds < limits.min_execution_speed) - throw Exception("Query is executing too slow: " + toString(progress.read_rows / elapsed_seconds) - + " rows/sec., minimum: " + toString(limits.min_execution_speed), - ErrorCodes::TOO_SLOW); - - if (limits.min_execution_speed_bytes && progress.read_bytes / elapsed_seconds < limits.min_execution_speed_bytes) - throw Exception("Query is executing too slow: " + toString(progress.read_bytes / elapsed_seconds) - + " bytes/sec., minimum: " + toString(limits.min_execution_speed_bytes), - ErrorCodes::TOO_SLOW); - - /// If the predicted execution time is longer than `max_execution_time`. - if (limits.max_execution_time != 0 && total_rows && progress.read_rows) - { - double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows) / progress.read_rows); - - if (estimated_execution_time_seconds > limits.max_execution_time.totalSeconds()) - throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)" - + " is too long. Maximum: " + toString(limits.max_execution_time.totalSeconds()) - + ". Estimated rows to process: " + toString(total_rows), - ErrorCodes::TOO_SLOW); - } - - if (limits.max_execution_speed && progress.read_rows / elapsed_seconds >= limits.max_execution_speed) - limitProgressingSpeed(progress.read_rows, limits.max_execution_speed, total_elapsed_microseconds); - - if (limits.max_execution_speed_bytes && progress.read_bytes / elapsed_seconds >= limits.max_execution_speed_bytes) - limitProgressingSpeed(progress.read_bytes, limits.max_execution_speed_bytes, total_elapsed_microseconds); - } - } + limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota != nullptr && limits.mode == LIMITS_TOTAL) { diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index f33c4534a3f..7b40292690e 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -202,16 +203,9 @@ public: SizeLimits size_limits; - Poco::Timespan max_execution_time = 0; - OverflowMode timeout_overflow_mode = OverflowMode::THROW; + ExecutionSpeedLimits speed_limit; - /// in rows per second - size_t min_execution_speed = 0; - size_t max_execution_speed = 0; - size_t min_execution_speed_bytes = 0; - size_t max_execution_speed_bytes = 0; - /// Verify that the speed is not too low after the specified time has elapsed. - Poco::Timespan timeout_before_checking_execution_speed = 0; + OverflowMode timeout_overflow_mode = OverflowMode::THROW; }; /** Set limitations that checked on each block. */ diff --git a/dbms/src/DataStreams/SizeLimits.cpp b/dbms/src/DataStreams/SizeLimits.cpp index 63164552120..be0308b6edd 100644 --- a/dbms/src/DataStreams/SizeLimits.cpp +++ b/dbms/src/DataStreams/SizeLimits.cpp @@ -7,13 +7,13 @@ namespace DB { -bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const +bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int too_many_rows_exception_code, int too_many_bytes_exception_code) const { if (max_rows && rows > max_rows) { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max rows: " + formatReadableQuantity(max_rows) - + ", current rows: " + formatReadableQuantity(rows), exception_code); + + ", current rows: " + formatReadableQuantity(rows), too_many_rows_exception_code); else return false; } @@ -22,7 +22,7 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti { if (overflow_mode == OverflowMode::THROW) throw Exception("Limit for " + std::string(what) + " exceeded, max bytes: " + formatReadableSizeWithBinarySuffix(max_bytes) - + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), exception_code); + + ", current bytes: " + formatReadableSizeWithBinarySuffix(bytes), too_many_bytes_exception_code); else return false; } @@ -30,4 +30,9 @@ bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int excepti return true; } +bool SizeLimits::check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const +{ + return check(rows, bytes, what, exception_code, exception_code); +} + } diff --git a/dbms/src/DataStreams/SizeLimits.h b/dbms/src/DataStreams/SizeLimits.h index 41238087613..1bd673b1602 100644 --- a/dbms/src/DataStreams/SizeLimits.h +++ b/dbms/src/DataStreams/SizeLimits.h @@ -31,6 +31,7 @@ struct SizeLimits : max_rows(max_rows_), max_bytes(max_bytes_), overflow_mode(overflow_mode_) {} /// Check limits. If exceeded, return false or throw an exception, depending on overflow_mode. + bool check(UInt64 rows, UInt64 bytes, const char * what, int too_many_rows_exception_code, int too_many_bytes_exception_code) const; bool check(UInt64 rows, UInt64 bytes, const char * what, int exception_code) const; }; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 39a1976d2d4..68d91bdba30 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1603,7 +1603,7 @@ void InterpreterSelectQuery::executeFetchColumns( IBlockInputStream::LocalLimits limits; limits.mode = IBlockInputStream::LIMITS_TOTAL; limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.max_execution_time = settings.max_execution_time; + limits.speed_limit.max_execution_time = settings.max_execution_time; limits.timeout_overflow_mode = settings.timeout_overflow_mode; /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, @@ -1615,11 +1615,11 @@ void InterpreterSelectQuery::executeFetchColumns( */ if (options.to_stage == QueryProcessingStage::Complete) { - limits.min_execution_speed = settings.min_execution_speed; - limits.max_execution_speed = settings.max_execution_speed; - limits.min_execution_speed_bytes = settings.min_execution_speed_bytes; - limits.max_execution_speed_bytes = settings.max_execution_speed_bytes; - limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + limits.speed_limit.min_execution_speed = settings.min_execution_speed; + limits.speed_limit.max_execution_speed = settings.max_execution_speed; + limits.speed_limit.min_execution_speed_bytes = settings.min_execution_speed_bytes; + limits.speed_limit.max_execution_speed_bytes = settings.max_execution_speed_bytes; + limits.speed_limit.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; } QuotaForIntervals & quota = context.getQuota(); diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp index 5eee08efcfc..094181d9cdb 100644 --- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -80,11 +80,11 @@ void LimitsCheckingTransform::transform(Chunk & chunk) bool LimitsCheckingTransform::checkTimeLimit() { - if (limits.max_execution_time != 0 - && info.total_stopwatch.elapsed() > static_cast(limits.max_execution_time.totalMicroseconds()) * 1000) + if (limits.speed_limit.max_execution_time != 0 + && info.total_stopwatch.elapsed() > static_cast(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000) return handleOverflowMode(limits.timeout_overflow_mode, "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds()) - + " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0), + + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0), ErrorCodes::TIMEOUT_EXCEEDED); return true; diff --git a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp index 29adb061e29..3bea5bc53a9 100644 --- a/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/dbms/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -51,7 +51,7 @@ void KafkaBlockInputStream::readPrefixImpl() const auto & limits_ = getLimits(); const size_t poll_timeout = buffer->pollTimeout(); - size_t rows_portion_size = poll_timeout ? std::min(max_block_size, limits_.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size; + size_t rows_portion_size = poll_timeout ? std::min(max_block_size, limits_.speed_limit.max_execution_time.totalMilliseconds() / poll_timeout) : max_block_size; rows_portion_size = std::max(rows_portion_size, 1ul); auto non_virtual_header = storage.getSampleBlockNonMaterialized(); /// FIXME: add materialized columns support diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index ed067993a18..ae9b2527d9a 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -387,7 +387,7 @@ bool StorageKafka::streamToViews() // Limit read batch to maximum block size to allow DDL IBlockInputStream::LocalLimits limits; - limits.max_execution_time = settings.stream_flush_interval_ms; + limits.speed_limit.max_execution_time = settings.stream_flush_interval_ms; limits.timeout_overflow_mode = OverflowMode::BREAK; stream->setLimits(limits); } From 23069ca6d0ca5cebfb42e1bd3f0fca37853f0c92 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 4 Oct 2019 18:40:05 +0300 Subject: [PATCH 016/222] Progress for MergeTreeSelectProcessor. --- dbms/src/DataStreams/IBlockInputStream.h | 10 +-- dbms/src/Interpreters/ProcessList.cpp | 2 + .../src/Processors/Executors/TreeExecutor.cpp | 43 ++++++++++- dbms/src/Processors/Executors/TreeExecutor.h | 13 ++++ .../Sources/SourceFromInputStream.cpp | 2 +- .../Sources/SourceFromInputStream.h | 11 ++- .../Processors/Sources/SourceWithProgress.cpp | 69 +++++++++++++++++ .../Processors/Sources/SourceWithProgress.h | 75 +++++++++++++++++++ .../MergeTreeBaseSelectProcessor.cpp | 6 +- .../MergeTree/MergeTreeBaseSelectProcessor.h | 8 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 9 ++- .../MergeTreeReverseSelectProcessor.cpp | 10 +-- .../MergeTreeReverseSelectProcessor.h | 2 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 9 +-- .../MergeTree/MergeTreeSelectProcessor.h | 2 +- 15 files changed, 234 insertions(+), 37 deletions(-) create mode 100644 dbms/src/Processors/Sources/SourceWithProgress.cpp create mode 100644 dbms/src/Processors/Sources/SourceWithProgress.h diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 7b40292690e..3bfdb614fbc 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -139,7 +139,7 @@ public: * The function takes the number of rows in the last block, the number of bytes in the last block. * Note that the callback can be called from different threads. */ - void setProgressCallback(const ProgressCallback & callback); + virtual void setProgressCallback(const ProgressCallback & callback); /** In this method: @@ -164,11 +164,11 @@ public: * Based on this information, the quota and some restrictions will be checked. * This information will also be available in the SHOW PROCESSLIST request. */ - void setProcessListElement(QueryStatus * elem); + virtual void setProcessListElement(QueryStatus * elem); /** Set the approximate total number of rows to read. */ - void addTotalRowsApprox(size_t value) { total_rows_approx += value; } + virtual void addTotalRowsApprox(size_t value) { total_rows_approx += value; } /** Ask to abort the receipt of data as soon as possible. @@ -209,7 +209,7 @@ public: }; /** Set limitations that checked on each block. */ - void setLimits(const LocalLimits & limits_) + virtual void setLimits(const LocalLimits & limits_) { limits = limits_; } @@ -222,7 +222,7 @@ public: /** Set the quota. If you set a quota on the amount of raw data, * then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. */ - void setQuota(QuotaForIntervals & quota_) + virtual void setQuota(QuotaForIntervals & quota_) { quota = "a_; } diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index 100ecc00dc1..3f7eca86930 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -28,6 +28,8 @@ namespace ErrorCodes extern const int TOO_MANY_SIMULTANEOUS_QUERIES; extern const int QUERY_WITH_SAME_ID_IS_ALREADY_RUNNING; extern const int LOGICAL_ERROR; + extern const int TOO_MANY_ROWS; + extern const int TOO_MANY_BYTES; } diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp index 593d455e672..469b1c36eb2 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -1,4 +1,5 @@ #include +#include #include namespace DB @@ -13,7 +14,7 @@ static void checkProcessorHasSingleOutput(IProcessor * processor) ErrorCodes::LOGICAL_ERROR); } -static void validateTree(const Processors & processors, IProcessor * root) +static void validateTree(const Processors & processors, IProcessor * root, std::vector & sources) { std::unordered_map index; @@ -56,6 +57,13 @@ static void validateTree(const Processors & processors, IProcessor * root) auto & children = node->getInputs(); for (auto & child : children) stack.push(&child.getOutputPort().getProcessor()); + + /// Fill sources array. + if (children.empty()) + { + if (auto * source = dynamic_cast(node)) + sources.push_back(source); + } } for (size_t i = 0; i < is_visited.size(); ++i) @@ -71,7 +79,7 @@ void TreeExecutor::init() root = processors.back().get(); - validateTree(processors, root); + validateTree(processors, root, sources_with_progress); port = std::make_unique(getHeader(), root); connect(root->getOutputs().front(), *port); @@ -170,4 +178,35 @@ Block TreeExecutor::readImpl() } } +void TreeExecutor::setProgressCallback(const ProgressCallback & callback) +{ + for (auto & source : sources_with_progress) + source->setProgressCallback(callback); +} + +void TreeExecutor::setProcessListElement(QueryStatus * elem) +{ + for (auto & source : sources_with_progress) + source->setProcessListElement(elem); +} + +void TreeExecutor::setLimits(const IBlockInputStream::LocalLimits & limits_) +{ + for (auto & source : sources_with_progress) + source->setLimits(limits_); +} + +void TreeExecutor::setQuota(QuotaForIntervals & quota_) +{ + for (auto & source : sources_with_progress) + source->setQuota(quota_); +} + +void TreeExecutor::addTotalRowsApprox(size_t value) +{ + /// Add only for one source. + if (!sources_with_progress.empty()) + sources_with_progress.front()->addTotalRowsApprox(value); +} + } diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h index 0aad5b3024a..4af989240c2 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.h +++ b/dbms/src/Processors/Executors/TreeExecutor.h @@ -5,6 +5,8 @@ namespace DB { +class ISourceWithProgress; + class TreeExecutor : public IBlockInputStream { public: @@ -13,6 +15,14 @@ public: String getName() const override { return root->getName(); } Block getHeader() const override { return root->getOutputs().front().getHeader(); } + /// This methods does not affect TreeExecutor as IBlockInputStream itself. + /// They just passed to all SourceWithProgress processors. + void setProgressCallback(const ProgressCallback & callback) final; + void setProcessListElement(QueryStatus * elem) final; + void setLimits(const LocalLimits & limits_) final; + void setQuota(QuotaForIntervals & quota_) final; + void addTotalRowsApprox(size_t value) final; + protected: Block readImpl() override; @@ -21,6 +31,9 @@ private: IProcessor * root = nullptr; std::unique_ptr port; + /// Remember sources that support progress. + std::vector sources_with_progress; + void init(); void execute(); }; diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.cpp b/dbms/src/Processors/Sources/SourceFromInputStream.cpp index b82130f5ebb..691a9785942 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.cpp +++ b/dbms/src/Processors/Sources/SourceFromInputStream.cpp @@ -7,7 +7,7 @@ namespace DB { SourceFromInputStream::SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_) - : ISource(stream_->getHeader()) + : ISourceWithProgress(stream_->getHeader()) , force_add_aggregating_info(force_add_aggregating_info_) , stream(std::move(stream_)) { diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h index 0e6c698f260..6f8a7fcd2d1 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.h +++ b/dbms/src/Processors/Sources/SourceFromInputStream.h @@ -1,5 +1,5 @@ #pragma once -#include +#include namespace DB { @@ -7,7 +7,7 @@ namespace DB class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; -class SourceFromInputStream : public ISource +class SourceFromInputStream : public ISourceWithProgress { public: explicit SourceFromInputStream(BlockInputStreamPtr stream_, bool force_add_aggregating_info_ = false); @@ -22,6 +22,13 @@ public: void addTotalsPort(); + /// Implementation for methods from ISourceWithProgress. + void setLimits(const LocalLimits & limits_) final { stream->setLimits(limits_); } + void setQuota(QuotaForIntervals & quota_) final { stream->setQuota(quota_); } + void setProcessListElement(QueryStatus * elem) final { stream->setProcessListElement(elem); } + void setProgressCallback(const ProgressCallback & callback) final { stream->setProgressCallback(callback); } + void addTotalRowsApprox(size_t value) final { stream->addTotalRowsApprox(value); } + private: bool has_aggregate_functions = false; bool force_add_aggregating_info; diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp new file mode 100644 index 00000000000..d3487faae3a --- /dev/null +++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp @@ -0,0 +1,69 @@ +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TOO_MANY_ROWS; + extern const int TOO_MANY_BYTES; +} + +void SourceWithProgress::progress(const Progress & value) +{ + if (total_rows_approx != 0 && process_list_elem) + { + process_list_elem->updateProgressIn({0, 0, total_rows_approx}); + total_rows_approx = 0; + } + + if (progress_callback) + progress_callback(value); + + if (process_list_elem) + { + if (!process_list_elem->updateProgressIn(value)) + cancel(); + + /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers. + + ProgressValues progress = process_list_elem->getProgressIn(); + size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read); + + /// Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read. + /// NOTE: Maybe it makes sense to have them checked directly in ProcessList? + if (limits.mode == LimitsMode::LIMITS_TOTAL) + { + if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read", + ErrorCodes::TOO_MANY_ROWS, ErrorCodes::TOO_MANY_BYTES)) + cancel(); + } + + size_t total_rows = progress.total_rows_to_read; + + constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds + UInt64 total_elapsed_microseconds = total_stopwatch.elapsedMicroseconds(); + + if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds) + { + /// Should be done in PipelineExecutor. + /// It is here for compatibility with IBlockInputsStream. + CurrentThread::updatePerformanceCounters(); + last_profile_events_update_time = total_elapsed_microseconds; + } + + /// Should be done in PipelineExecutor. + /// It is here for compatibility with IBlockInputsStream. + limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); + + if (quota != nullptr && limits.mode == LimitsMode::LIMITS_TOTAL) + { + quota->checkAndAddReadRowsBytes(time(nullptr), value.read_rows, value.read_bytes); + } + } +} + +} diff --git a/dbms/src/Processors/Sources/SourceWithProgress.h b/dbms/src/Processors/Sources/SourceWithProgress.h new file mode 100644 index 00000000000..833e5eccb6f --- /dev/null +++ b/dbms/src/Processors/Sources/SourceWithProgress.h @@ -0,0 +1,75 @@ +#pragma once +#include +#include +#include + +namespace DB +{ + +/// Adds progress to ISource. +/// This class takes care of limits, quotas, callback on progress and updating performance counters for current thread. +class ISourceWithProgress : public ISource +{ +public: + using ISource::ISource; + + using LocalLimits = IBlockInputStream::LocalLimits; + using LimitsMode = IBlockInputStream::LimitsMode; + + /// Set limitations that checked on each chunk. + virtual void setLimits(const LocalLimits & limits_) = 0; + + /// Set the quota. If you set a quota on the amount of raw data, + /// then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits. + virtual void setQuota(QuotaForIntervals & quota_) = 0; + + /// Set the pointer to the process list item. + /// General information about the resources spent on the request will be written into it. + /// Based on this information, the quota and some restrictions will be checked. + /// This information will also be available in the SHOW PROCESSLIST request. + virtual void setProcessListElement(QueryStatus * elem) = 0; + + /// Set the execution progress bar callback. + /// It is called after each chunk. + /// The function takes the number of rows in the last chunk, the number of bytes in the last chunk. + /// Note that the callback can be called from different threads. + virtual void setProgressCallback(const ProgressCallback & callback) = 0; + + /// Set the approximate total number of rows to read. + virtual void addTotalRowsApprox(size_t value) = 0; +}; + +/// Implementation for ISourceWithProgress +class SourceWithProgress : public ISourceWithProgress +{ +public: + using ISourceWithProgress::ISourceWithProgress; + + using LocalLimits = IBlockInputStream::LocalLimits; + using LimitsMode = IBlockInputStream::LimitsMode; + + void setLimits(const LocalLimits & limits_) final { limits = limits_; } + void setQuota(QuotaForIntervals & quota_) final { quota = "a_; } + void setProcessListElement(QueryStatus * elem) final { process_list_elem = elem; } + void setProgressCallback(const ProgressCallback & callback) final { progress_callback = callback; } + void addTotalRowsApprox(size_t value) final { total_rows_approx += value; } + +protected: + /// Call this method to provide information about progress. + void progress(const Progress & value); + +private: + LocalLimits limits; + QuotaForIntervals * quota = nullptr; + ProgressCallback progress_callback; + QueryStatus * process_list_elem = nullptr; + + /// The approximate total number of rows to read. For progress bar. + size_t total_rows_approx = 0; + + Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time. + /// According to total_stopwatch in microseconds. + UInt64 last_profile_events_update_time = 0; +}; + +} diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 77f33dce01d..17c5e4609c7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -3,9 +3,7 @@ #include #include #include -#include #include -#include #include @@ -32,7 +30,7 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( bool save_marks_in_cache_, const Names & virt_column_names_) : - ISource(getHeader(std::move(header), prewhere_info_, virt_column_names_)), + SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_)), storage(storage_), prewhere_info(prewhere_info_), max_block_size_rows(max_block_size_rows_), @@ -176,7 +174,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl() UInt64 num_filtered_rows = read_result.numReadRows() - read_result.num_rows; - /// TODO: progressImpl({ read_result.numReadRows(), read_result.numBytesRead() }); + progress({ read_result.numReadRows(), read_result.numBytesRead() }); if (task->size_predictor) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index db369a5f267..22692271e58 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -5,7 +5,7 @@ #include #include -#include +#include namespace DB { @@ -16,7 +16,7 @@ class MarkCache; /// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream -class MergeTreeBaseSelectProcessor : public ISource +class MergeTreeBaseSelectProcessor : public SourceWithProgress { public: MergeTreeBaseSelectProcessor( @@ -39,7 +39,7 @@ public: protected: Chunk generate() final; - /// Creates new this->task, and initilizes readers + /// Creates new this->task, and initializes readers. virtual bool getNewTask() = 0; virtual Chunk readFromPart(); @@ -52,8 +52,6 @@ protected: void initializeRangeReaders(MergeTreeReadTask & task); - size_t estimateNumRows(MergeTreeReadTask & current_task, MergeTreeRangeReader & current_reader); - protected: const MergeTreeData & storage; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 7df4178f58f..5d5f0057121 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -749,17 +749,18 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( for (size_t i = 0; i < num_streams; ++i) { - res.push_back({std::make_shared( + auto source = std::make_shared( i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, - query_info.prewhere_info, settings, virt_columns)}); + query_info.prewhere_info, settings, virt_columns); if (i == 0) { /// Set the approximate number of rows for the first source only - /// TODO - /// res.front()->addTotalRowsApprox(total_rows); + source->addTotalRowsApprox(total_rows); } + + res.push_back({std::move(source)}); } } else if (sum_marks > 0) diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index ff8d599135b..af8c02318d7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -39,7 +38,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, Names required_columns_, - const MarkRanges & mark_ranges_, + MarkRanges mark_ranges_, bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, bool check_columns, @@ -55,10 +54,10 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( storage_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, min_bytes_to_use_direct_io_, max_read_buffer_size_, use_uncompressed_cache_, save_marks_in_cache_, virt_column_names_}, - required_columns{required_columns_}, + required_columns{std::move(required_columns_)}, data_part{owned_data_part_}, part_columns_lock(data_part->columns_lock), - all_mark_ranges(mark_ranges_), + all_mark_ranges(std::move(mark_ranges_)), part_index_in_query(part_index_in_query_), path(data_part->getFullPath()) { @@ -76,8 +75,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( : "") << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - /// TODO - /// addTotalRowsApprox(total_rows); + addTotalRowsApprox(total_rows); ordered_names = header_without_virtual_columns.getNames(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index dcba0ca5e36..58202988e4c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -23,7 +23,7 @@ public: size_t preferred_block_size_bytes, size_t preferred_max_column_in_block_size_bytes, Names column_names, - const MarkRanges & mark_ranges, + MarkRanges mark_ranges, bool use_uncompressed_cache, const PrewhereInfoPtr & prewhere_info, bool check_columns, diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 04954d6ff82..51ed337367d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -39,7 +38,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, Names required_columns_, - const MarkRanges & mark_ranges_, + MarkRanges mark_ranges_, bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, bool check_columns_, @@ -58,7 +57,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( required_columns{std::move(required_columns_)}, data_part{owned_data_part_}, part_columns_lock(data_part->columns_lock), - all_mark_ranges(mark_ranges_), + all_mark_ranges(std::move(mark_ranges_)), part_index_in_query(part_index_in_query_), check_columns(check_columns_), path(data_part->getFullPath()) @@ -77,9 +76,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( : "") << " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin)); - /// TODO - /// addTotalRowsApprox(total_rows); - + addTotalRowsApprox(total_rows); ordered_names = header_without_virtual_columns.getNames(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 0551d966481..c0d93842a81 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -23,7 +23,7 @@ public: size_t preferred_block_size_bytes, size_t preferred_max_column_in_block_size_bytes, Names column_names_, - const MarkRanges & mark_ranges, + MarkRanges mark_ranges, bool use_uncompressed_cache, const PrewhereInfoPtr & prewhere_info, bool check_columns, From d4f11af8175c15df419724184e27266c3f8b3413 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 4 Oct 2019 20:46:48 +0300 Subject: [PATCH 017/222] Update QueryPipeline. --- dbms/src/Processors/QueryPipeline.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp index 6fbc3bb8ff1..06fed2ac3fc 100644 --- a/dbms/src/Processors/QueryPipeline.cpp +++ b/dbms/src/Processors/QueryPipeline.cpp @@ -515,8 +515,8 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback) { for (auto & processor : processors) { - if (auto * source = typeid_cast(processor.get())) - source->getStream().setProgressCallback(callback); + if (auto * source = typeid_cast(processor.get())) + source->setProgressCallback(callback); if (auto * source = typeid_cast(processor.get())) source->setProgressCallback(callback); @@ -527,8 +527,8 @@ void QueryPipeline::setProcessListElement(QueryStatus * elem) { for (auto & processor : processors) { - if (auto * source = typeid_cast(processor.get())) - source->getStream().setProcessListElement(elem); + if (auto * source = dynamic_cast(processor.get())) + source->setProcessListElement(elem); if (auto * source = typeid_cast(processor.get())) source->setProcessListElement(elem); From c7bb83262ecfa48c72f04ccd1cd048d75dedb3af Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Oct 2019 12:33:16 +0300 Subject: [PATCH 018/222] Fix progress callback for processors pipeline. --- dbms/src/Processors/QueryPipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/QueryPipeline.cpp b/dbms/src/Processors/QueryPipeline.cpp index 06fed2ac3fc..445e12a2a2d 100644 --- a/dbms/src/Processors/QueryPipeline.cpp +++ b/dbms/src/Processors/QueryPipeline.cpp @@ -515,7 +515,7 @@ void QueryPipeline::setProgressCallback(const ProgressCallback & callback) { for (auto & processor : processors) { - if (auto * source = typeid_cast(processor.get())) + if (auto * source = dynamic_cast(processor.get())) source->setProgressCallback(callback); if (auto * source = typeid_cast(processor.get())) From ea27918de87ce0d29bc4778583b591b76f6ff5e3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Oct 2019 12:40:30 +0300 Subject: [PATCH 019/222] Try fix progressbar. --- dbms/src/Processors/Sources/SourceWithProgress.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp index d3487faae3a..1e63003acef 100644 --- a/dbms/src/Processors/Sources/SourceWithProgress.cpp +++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp @@ -16,7 +16,12 @@ void SourceWithProgress::progress(const Progress & value) { if (total_rows_approx != 0 && process_list_elem) { - process_list_elem->updateProgressIn({0, 0, total_rows_approx}); + Progress total_rows_progress = {0, 0, total_rows_approx}; + + if (progress_callback) + progress_callback(total_rows_progress); + + process_list_elem->updateProgressIn(total_rows_progress); total_rows_approx = 0; } From eb2677c94ef1abf005a331172ded06d7b7a1882b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Oct 2019 12:44:24 +0300 Subject: [PATCH 020/222] Try fix progressbar. --- dbms/src/Processors/Sources/SourceWithProgress.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp index 1e63003acef..57bf6f2dca0 100644 --- a/dbms/src/Processors/Sources/SourceWithProgress.cpp +++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp @@ -14,14 +14,16 @@ namespace ErrorCodes void SourceWithProgress::progress(const Progress & value) { - if (total_rows_approx != 0 && process_list_elem) + if (total_rows_approx != 0) { Progress total_rows_progress = {0, 0, total_rows_approx}; if (progress_callback) progress_callback(total_rows_progress); - process_list_elem->updateProgressIn(total_rows_progress); + if (process_list_elem) + process_list_elem->updateProgressIn(total_rows_progress); + total_rows_approx = 0; } From dea89cfc11c79150b073302735450a3d47ffbae4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Oct 2019 19:42:13 +0300 Subject: [PATCH 021/222] Disable processors by default. --- dbms/src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 8f2474982a0..1d2cb2e6416 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -359,7 +359,7 @@ struct Settings : public SettingsCollection M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \ \ - M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \ + M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \ \ M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \ M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \ From 4728bdfccd57517e692fb2cbe6da204d4e3e3e81 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 14:20:25 +0300 Subject: [PATCH 022/222] Fix MergeTreeSequentialBlockInputStream. --- .../MergeTree/MergeTreeSequentialBlockInputStream.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp index eeeb07f1c26..7b5ca701287 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp @@ -107,12 +107,16 @@ try if (should_evaluate_missing_defaults) reader->evaluateMissingDefaults({}, columns); + res = header.cloneEmpty(); + /// Reorder columns and fill result block. size_t num_columns = sample.size(); auto it = sample.begin(); for (size_t i = 0; i < num_columns; ++i) { - res.insert({columns[i], it->type, it->name}); + if (header.has(it->name)) + header.getByName(it->name).column = std::move(columns[i]); + ++it; } From 378052743d04ed71b7a485d948c69ab4f0f3a8aa Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 14:24:29 +0300 Subject: [PATCH 023/222] Fix MergeTreeSequentialBlockInputStream. --- .../MergeTree/MergeTreeSequentialBlockInputStream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp index 7b5ca701287..081ad289d28 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeSequentialBlockInputStream.cpp @@ -114,8 +114,8 @@ try auto it = sample.begin(); for (size_t i = 0; i < num_columns; ++i) { - if (header.has(it->name)) - header.getByName(it->name).column = std::move(columns[i]); + if (res.has(it->name)) + res.getByName(it->name).column = std::move(columns[i]); ++it; } From ef14df4632450a431d18b7201b9588d5045a1574 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 10 Oct 2019 17:16:15 +0300 Subject: [PATCH 024/222] Added more comments. --- dbms/src/DataStreams/ExecutionSpeedLimits.h | 1 + dbms/src/DataStreams/IBlockInputStream.cpp | 8 ++++---- dbms/src/DataStreams/IBlockInputStream.h | 2 +- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 12 ++++++------ dbms/src/Processors/Executors/TreeExecutor.cpp | 2 ++ dbms/src/Processors/Executors/TreeExecutor.h | 9 +++++++++ dbms/src/Processors/Sources/SourceFromInputStream.h | 1 + dbms/src/Processors/Sources/SourceWithProgress.cpp | 12 +++++++++--- .../Transforms/LimitsCheckingTransform.cpp | 6 +++--- dbms/src/Processors/Transforms/ReverseTransform.h | 1 + dbms/src/Storages/IStorage.h | 4 ++++ dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- .../MergeTree/MergeTreeBaseSelectProcessor.h | 5 ++++- .../MergeTreeThreadSelectBlockInputProcessor.cpp | 11 ++++++----- 14 files changed, 52 insertions(+), 24 deletions(-) diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h index 67627cb36bf..6dbc2e5c687 100644 --- a/dbms/src/DataStreams/ExecutionSpeedLimits.h +++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h @@ -20,6 +20,7 @@ public: /// Verify that the speed is not too low after the specified time has elapsed. Poco::Timespan timeout_before_checking_execution_speed = 0; + /// Pause execution in case if speed limits were exceeded. void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds); }; diff --git a/dbms/src/DataStreams/IBlockInputStream.cpp b/dbms/src/DataStreams/IBlockInputStream.cpp index 447b595d438..2e30749e89f 100644 --- a/dbms/src/DataStreams/IBlockInputStream.cpp +++ b/dbms/src/DataStreams/IBlockInputStream.cpp @@ -219,11 +219,11 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co bool IBlockInputStream::checkTimeLimit() { - if (limits.speed_limit.max_execution_time != 0 - && info.total_stopwatch.elapsed() > static_cast(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000) + if (limits.speed_limits.max_execution_time != 0 + && info.total_stopwatch.elapsed() > static_cast(limits.speed_limits.max_execution_time.totalMicroseconds()) * 1000) return handleOverflowMode(limits.timeout_overflow_mode, "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds()) - + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0), + + " seconds, maximum: " + toString(limits.speed_limits.max_execution_time.totalMicroseconds() / 1000000.0), ErrorCodes::TIMEOUT_EXCEEDED); return true; @@ -289,7 +289,7 @@ void IBlockInputStream::progressImpl(const Progress & value) last_profile_events_update_time = total_elapsed_microseconds; } - limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); + limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota != nullptr && limits.mode == LIMITS_TOTAL) { diff --git a/dbms/src/DataStreams/IBlockInputStream.h b/dbms/src/DataStreams/IBlockInputStream.h index 059e73f6db9..4f945001686 100644 --- a/dbms/src/DataStreams/IBlockInputStream.h +++ b/dbms/src/DataStreams/IBlockInputStream.h @@ -202,7 +202,7 @@ public: SizeLimits size_limits; - ExecutionSpeedLimits speed_limit; + ExecutionSpeedLimits speed_limits; OverflowMode timeout_overflow_mode = OverflowMode::THROW; }; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index ac73d888116..ff67bc170e9 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1587,7 +1587,7 @@ void InterpreterSelectQuery::executeFetchColumns( IBlockInputStream::LocalLimits limits; limits.mode = IBlockInputStream::LIMITS_TOTAL; limits.size_limits = SizeLimits(settings.max_rows_to_read, settings.max_bytes_to_read, settings.read_overflow_mode); - limits.speed_limit.max_execution_time = settings.max_execution_time; + limits.speed_limits.max_execution_time = settings.max_execution_time; limits.timeout_overflow_mode = settings.timeout_overflow_mode; /** Quota and minimal speed restrictions are checked on the initiating server of the request, and not on remote servers, @@ -1599,11 +1599,11 @@ void InterpreterSelectQuery::executeFetchColumns( */ if (options.to_stage == QueryProcessingStage::Complete) { - limits.speed_limit.min_execution_speed = settings.min_execution_speed; - limits.speed_limit.max_execution_speed = settings.max_execution_speed; - limits.speed_limit.min_execution_speed_bytes = settings.min_execution_speed_bytes; - limits.speed_limit.max_execution_speed_bytes = settings.max_execution_speed_bytes; - limits.speed_limit.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; + limits.speed_limits.min_execution_speed = settings.min_execution_speed; + limits.speed_limits.max_execution_speed = settings.max_execution_speed; + limits.speed_limits.min_execution_speed_bytes = settings.min_execution_speed_bytes; + limits.speed_limits.max_execution_speed_bytes = settings.max_execution_speed_bytes; + limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; } QuotaForIntervals & quota = context.getQuota(); diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp index 469b1c36eb2..94e2dfe5b5a 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -14,6 +14,8 @@ static void checkProcessorHasSingleOutput(IProcessor * processor) ErrorCodes::LOGICAL_ERROR); } +/// Check tree invariants (described in TreeExecutor.h). +/// Collect sources with progress. static void validateTree(const Processors & processors, IProcessor * root, std::vector & sources) { std::unordered_map index; diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h index 4af989240c2..51fc82200b8 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.h +++ b/dbms/src/Processors/Executors/TreeExecutor.h @@ -7,9 +7,17 @@ namespace DB class ISourceWithProgress; +/// It's a wrapper from processors tree-shaped pipeline to block input stream. +/// Execute all processors in a single thread, by in-order tree traverse. +/// Also, support fro progress and quotas. class TreeExecutor : public IBlockInputStream { public: + /// Last processor in list must be a tree root. + /// It is checked that + /// * processors form a tree + /// * all processors are attainable from root + /// * there is no other connected processors explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); } String getName() const override { return root->getName(); } @@ -35,6 +43,7 @@ private: std::vector sources_with_progress; void init(); + /// Execute tree step-by-step until root returns next chunk or execution is finished. void execute(); }; diff --git a/dbms/src/Processors/Sources/SourceFromInputStream.h b/dbms/src/Processors/Sources/SourceFromInputStream.h index 6f8a7fcd2d1..888439f15d5 100644 --- a/dbms/src/Processors/Sources/SourceFromInputStream.h +++ b/dbms/src/Processors/Sources/SourceFromInputStream.h @@ -7,6 +7,7 @@ namespace DB class IBlockInputStream; using BlockInputStreamPtr = std::shared_ptr; +/// Wrapper for IBlockInputStream which implements ISourceWithProgress. class SourceFromInputStream : public ISourceWithProgress { public: diff --git a/dbms/src/Processors/Sources/SourceWithProgress.cpp b/dbms/src/Processors/Sources/SourceWithProgress.cpp index 57bf6f2dca0..21f9d5ca9bb 100644 --- a/dbms/src/Processors/Sources/SourceWithProgress.cpp +++ b/dbms/src/Processors/Sources/SourceWithProgress.cpp @@ -12,6 +12,8 @@ namespace ErrorCodes extern const int TOO_MANY_BYTES; } +/// Aggregated copy-paste from IBlockInputStream::progressImpl. +/// Most of this must be done in PipelineExecutor outside. Now it's done for compatibility with IBlockInputStream. void SourceWithProgress::progress(const Progress & value) { if (total_rows_approx != 0) @@ -35,13 +37,17 @@ void SourceWithProgress::progress(const Progress & value) if (!process_list_elem->updateProgressIn(value)) cancel(); - /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers. + /// The total amount of data processed or intended for processing in all sources, possibly on remote servers. ProgressValues progress = process_list_elem->getProgressIn(); size_t total_rows_estimate = std::max(progress.read_rows, progress.total_rows_to_read); - /// Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read. + /// Check the restrictions on the + /// * amount of data to read + /// * speed of the query + /// * quota on the amount of data to read /// NOTE: Maybe it makes sense to have them checked directly in ProcessList? + if (limits.mode == LimitsMode::LIMITS_TOTAL) { if (!limits.size_limits.check(total_rows_estimate, progress.read_bytes, "rows to read", @@ -64,7 +70,7 @@ void SourceWithProgress::progress(const Progress & value) /// Should be done in PipelineExecutor. /// It is here for compatibility with IBlockInputsStream. - limits.speed_limit.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); + limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_elapsed_microseconds); if (quota != nullptr && limits.mode == LimitsMode::LIMITS_TOTAL) { diff --git a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp index 094181d9cdb..4947d11974b 100644 --- a/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp +++ b/dbms/src/Processors/Transforms/LimitsCheckingTransform.cpp @@ -80,11 +80,11 @@ void LimitsCheckingTransform::transform(Chunk & chunk) bool LimitsCheckingTransform::checkTimeLimit() { - if (limits.speed_limit.max_execution_time != 0 - && info.total_stopwatch.elapsed() > static_cast(limits.speed_limit.max_execution_time.totalMicroseconds()) * 1000) + if (limits.speed_limits.max_execution_time != 0 + && info.total_stopwatch.elapsed() > static_cast(limits.speed_limits.max_execution_time.totalMicroseconds()) * 1000) return handleOverflowMode(limits.timeout_overflow_mode, "Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds()) - + " seconds, maximum: " + toString(limits.speed_limit.max_execution_time.totalMicroseconds() / 1000000.0), + + " seconds, maximum: " + toString(limits.speed_limits.max_execution_time.totalMicroseconds() / 1000000.0), ErrorCodes::TIMEOUT_EXCEEDED); return true; diff --git a/dbms/src/Processors/Transforms/ReverseTransform.h b/dbms/src/Processors/Transforms/ReverseTransform.h index 2e3eca25648..6450fbbae47 100644 --- a/dbms/src/Processors/Transforms/ReverseTransform.h +++ b/dbms/src/Processors/Transforms/ReverseTransform.h @@ -4,6 +4,7 @@ namespace DB { +/// Reverse rows in chunk. class ReverseTransform : public ISimpleTransform { public: diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index 3bd494fdb4a..b224f84be97 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -238,6 +238,8 @@ public: * if the storage can return a different number of streams. * * It is guaranteed that the structure of the table will not change over the lifetime of the returned streams (that is, there will not be ALTER, RENAME and DROP). + * + * Default implementation calls `readWithProcessors` and wraps into TreeExecutor. */ virtual BlockInputStreams read( const Names & /*column_names*/, @@ -247,6 +249,8 @@ public: size_t /*max_block_size*/, unsigned /*num_streams*/); + /** The same as read, but returns processors. + */ virtual Pipes readWithProcessors( const Names & /*column_names*/, const SelectQueryInfo & /*query_info*/, diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index 46b6ac7e2d6..c0109f337c9 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -388,7 +388,7 @@ bool StorageKafka::streamToViews() // Limit read batch to maximum block size to allow DDL IBlockInputStream::LocalLimits limits; - limits.speed_limit.max_execution_time = settings.stream_flush_interval_ms; + limits.speed_limits.max_execution_time = settings.stream_flush_interval_ms; limits.timeout_overflow_mode = OverflowMode::BREAK; stream->setLimits(limits); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 22692271e58..7f3367b74c8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -15,7 +15,7 @@ class UncompressedCache; class MarkCache; -/// Base class for MergeTreeThreadSelectBlockInputStream and MergeTreeSelectBlockInputStream +/// Base class for MergeTreeThreadSelectProcessor and MergeTreeSelectProcessor class MergeTreeBaseSelectProcessor : public SourceWithProgress { public: @@ -46,8 +46,10 @@ protected: Chunk readFromPartImpl(); + /// Two versions for header and chunk. static void injectVirtualColumns(Block & block, MergeTreeReadTask * task, const Names & virtual_columns); static void injectVirtualColumns(Chunk & chunk, MergeTreeReadTask * task, const Names & virtual_columns); + static Block getHeader(Block block, const PrewhereInfoPtr & prewhere_info, const Names & virtual_columns); void initializeRangeReaders(MergeTreeReadTask & task); @@ -68,6 +70,7 @@ protected: bool save_marks_in_cache; Names virt_column_names; + /// This header is used for chunks from readFromPart(). Block header_without_virtual_columns; std::unique_ptr task; diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index 78122c53ac1..cc090833f1e 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -20,11 +20,12 @@ MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcess const Settings & settings, const Names & virt_column_names_) : - MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_, - preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, use_uncompressed_cache_, true, virt_column_names_}, - thread{thread_}, - pool{pool_} + MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_, + preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, + settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, + use_uncompressed_cache_, true, virt_column_names_}, + thread{thread_}, + pool{pool_} { /// round min_marks_to_read up to nearest multiple of block_size expressed in marks /// If granularity is adaptive it doesn't make sense From 89dfe7882d3fae32ee33434e7635f99f3afc4178 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 11 Oct 2019 11:55:00 +0300 Subject: [PATCH 025/222] Enable processors by default. --- dbms/src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 1d2cb2e6416..8f2474982a0 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -359,7 +359,7 @@ struct Settings : public SettingsCollection M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \ \ - M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \ + M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \ \ M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \ M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \ From 7c2575542b63ddbb8fdabd6df1ecaf8d89183e45 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 12 Oct 2019 21:52:20 +0300 Subject: [PATCH 026/222] Fix build. --- dbms/src/Processors/Transforms/ReverseTransform.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp index eb2b39d26d1..e2fb66411aa 100644 --- a/dbms/src/Processors/Transforms/ReverseTransform.cpp +++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { From 436e87a8edf6685bf52e3ee0af79578f2bcc6c5e Mon Sep 17 00:00:00 2001 From: FeehanG <51821376+FeehanG@users.noreply.github.com> Date: Mon, 14 Oct 2019 14:03:23 +0300 Subject: [PATCH 027/222] Update parametric_functions.md --- .../agg_functions/parametric_functions.md | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index 13cbc2b05d8..a044f7d97be 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -73,7 +73,7 @@ In this case, you should remember that you don't know the histogram bin borders. ## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} -Checks whether the sequence contains the event chain that matches the pattern. +Checks whether the sequence contains an event chain that matches the pattern. ```sql sequenceMatch(pattern)(timestamp, cond1, cond2, ...) @@ -87,9 +87,9 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) - `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). -- `timestamp` — Column that considered to contain time data. Typical data types are `Date`, and `DateTime`. You can use also any of the supported [UInt](../../data_types/int_uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes into account only the events described in these conditions. If the sequence contains data that are not described with conditions the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described under these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. **Returned values** @@ -104,11 +104,11 @@ Type: `UInt8`. **Pattern syntax** -- `(?N)` — Matches the condition argument at the position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. +- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. -- `.*` — Matches any number of any events. You don't need the conditional arguments to match this element of the pattern. +- `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern. -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that distanced from each other for more than 1800 seconds. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators. +- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators. **Examples** @@ -133,7 +133,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t └───────────────────────────────────────────────────────────────────────┘ ``` -The function has found the event chain where number 2 follows number 1. It skipped number 3 between them, because the number is not described as an event. If we want to take this number into account when searching for the event chain, showed in the example, we should make a condition for it. +The function found the event chain where number 2 follows number 1. It skipped number 3 between them, because the number is not described as an event. If we want to take this number into account when searching for the event chain given in the example, we should make a condition for it. ```sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t @@ -144,7 +144,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -In this case the function couldn't find the event chain matching the pattern, because there is the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern. +In this case, the function couldn't find the event chain matching the pattern, because the event for number 3 occured between 1 and 2. If in the same case we checked the condition for number 4, the sequence would match the pattern. ```sql SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t @@ -163,7 +163,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM ## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} -Counts the number of event chains that matched the pattern. The function searches event chains that not overlap. It starts to search for the next chain after the current chain is matched. +Counts the number of event chains that matched the pattern. The function searches event chains that don't overlap. It starts to search for the next chain after the current chain is matched. !!! warning "Warning" Events that occur at the same second may lay in the sequence in an undefined order affecting the result. @@ -176,14 +176,14 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) - `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). -- `timestamp` — Column that considered to contain time data. Typical data types are `Date`, and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes into account only the events described in these conditions. If the sequence contains data that are not described with conditions the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. **Returned values** -- Number of non-overlapping event chains that are matched +- Number of non-overlapping event chains that are matched. Type: `UInt64`. @@ -230,7 +230,7 @@ windowFunnel(window)(timestamp, cond1, cond2, cond3, ...) **Parameters:** - `window` — Length of the sliding window in seconds. -- `timestamp` — Name of the column containing the timestamp. Data type support: `Date`,`DateTime`, and other unsigned integer types (note that though timestamp support `UInt64` type, there is a limitation it's value can't overflow maximum of Int64, which is 2^63 - 1). +- `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`,`DateTime`, and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it's value can't exceed the Int64 maximum, which is 2^63 - 1). - `cond1`, `cond2`... — Conditions or data describing the chain of events. Data type: `UInt8`. Values can be 0 or 1. **Algorithm** From 5364f76625eb7392e7f04396a2aebce92f4a5810 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 14 Oct 2019 16:50:16 +0300 Subject: [PATCH 028/222] Fix build. --- dbms/src/Processors/Transforms/ReverseTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Transforms/ReverseTransform.cpp b/dbms/src/Processors/Transforms/ReverseTransform.cpp index e2fb66411aa..98f2bf54aa5 100644 --- a/dbms/src/Processors/Transforms/ReverseTransform.cpp +++ b/dbms/src/Processors/Transforms/ReverseTransform.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { From d1fb23882089157576de77ef0799e34150dc9891 Mon Sep 17 00:00:00 2001 From: memo Date: Tue, 15 Oct 2019 15:43:05 +0800 Subject: [PATCH 029/222] implement arrayCompact --- dbms/src/Functions/array/arrayCompact.cpp | 141 ++++++++++++++++++ .../registerFunctionsHigherOrder.cpp | 2 + 2 files changed, 143 insertions(+) create mode 100644 dbms/src/Functions/array/arrayCompact.cpp diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp new file mode 100644 index 00000000000..9f763a12ec3 --- /dev/null +++ b/dbms/src/Functions/array/arrayCompact.cpp @@ -0,0 +1,141 @@ +#include +#include +#include "FunctionArrayMapped.h" +#include + + +namespace DB +{ + + namespace ErrorCodes + { + extern const int ILLEGAL_COLUMN; + } + + struct ArrayCompactImpl + { + static bool needBoolean() { return false; } + static bool needExpression() { return false; } + static bool needOneArray() { return false; } + + static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypePtr & /*array_element*/) + { + WhichDataType which(expression_return); + + if (which.isNativeUInt()) + return std::make_shared(std::make_shared()); + + if (which.isNativeInt()) + return std::make_shared(std::make_shared()); + + if (which.isFloat()) + return std::make_shared(std::make_shared()); + + throw Exception("arrayCompact cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + + template + static bool executeType(const ColumnPtr & mapped, const ColumnArray & array, ColumnPtr & res_ptr) + { + const ColumnVector * column = checkAndGetColumn>(&*mapped); + + if (!column) + { + const ColumnConst * column_const = checkAndGetColumnConst>(&*mapped); + + if (!column_const) + return false; + + const Element x = column_const->template getValue(); + const IColumn::Offsets & offsets = array.getOffsets(); + auto column_data = ColumnVector::create(column_const->size()); + typename ColumnVector::Container & res_values = column_data->getData(); + auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size()); + IColumn::Offsets & res_offsets = column_offsets->getData(); + + size_t res_pos = 0; + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + if (pos < offsets[i]) + { + res_values[res_pos] = x; + for (++pos, ++res_pos; pos < offsets[i]; ++pos) + { + res_values[res_pos++] = x; + } + } + res_offsets[i] = res_pos; + } + for(size_t i = 0; i < column_data->size() - res_pos; ++i) + { + res_values.pop_back(); + } + res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets)); + return true; + } + + const IColumn::Offsets & offsets = array.getOffsets(); + const typename ColumnVector::Container & data = column->getData(); + auto column_data = ColumnVector::create(data.size()); + typename ColumnVector::Container & res_values = column_data->getData(); + auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size()); + IColumn::Offsets & res_offsets = column_offsets->getData(); + + size_t res_pos = 0; + size_t pos = 0; + for (size_t i = 0; i < offsets.size(); ++i) + { + if (pos < offsets[i]) + { + res_values[res_pos] = data[pos]; + for (++pos, ++res_pos; pos < offsets[i]; ++pos) + { + if(data[pos] != data[pos - 1]) + { + res_values[res_pos++] = data[pos]; + } + } + } + res_offsets[i] = res_pos; + } + for(size_t i = 0; i < data.size() - res_pos; ++i) + { + res_values.pop_back(); + } + res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets)); + return true; + } + + static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) + { + ColumnPtr res; + + if (executeType< UInt8 , UInt64>(mapped, array, res) || + executeType< UInt16, UInt64>(mapped, array, res) || + executeType< UInt32, UInt64>(mapped, array, res) || + executeType< UInt64, UInt64>(mapped, array, res) || + executeType< Int8 , Int64>(mapped, array, res) || + executeType< Int16, Int64>(mapped, array, res) || + executeType< Int32, Int64>(mapped, array, res) || + executeType< Int64, Int64>(mapped, array, res) || + executeType(mapped, array, res) || + executeType(mapped, array, res)) + return res; + else + throw Exception("Unexpected column for arrayCompact: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); + } + + }; + + struct NameArrayCompact { static constexpr auto name = "arrayCompact"; }; + using FunctionArrayCompact = FunctionArrayMapped; + + void registerFunctionArrayCompact(FunctionFactory & factory) + { + factory.registerFunction(); + } + +} + diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp index e0948ebc913..c5f7f341a53 100644 --- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp +++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp @@ -8,6 +8,7 @@ void registerFunctionArrayFilter(FunctionFactory &); void registerFunctionArrayCount(FunctionFactory &); void registerFunctionArrayExists(FunctionFactory &); void registerFunctionArrayAll(FunctionFactory &); +void registerFunctionArrayCompact(FunctionFactory &); void registerFunctionArraySum(FunctionFactory &); void registerFunctionArrayFirst(FunctionFactory &); void registerFunctionArrayFirstIndex(FunctionFactory &); @@ -24,6 +25,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArrayCount(factory); registerFunctionArrayExists(factory); registerFunctionArrayAll(factory); + registerFunctionArrayCompact(factory); registerFunctionArraySum(factory); registerFunctionArrayFirst(factory); registerFunctionArrayFirstIndex(factory); From 8b1b7f2a5f938a28d4eba37b3a7e915e056140f9 Mon Sep 17 00:00:00 2001 From: memo Date: Tue, 15 Oct 2019 16:04:59 +0800 Subject: [PATCH 030/222] add arrayCompact test --- .../0_stateless/01020_function_array_compact.reference | 9 +++++++++ .../queries/0_stateless/01020_function_array_compact.sql | 9 +++++++++ 2 files changed, 18 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01020_function_array_compact.reference create mode 100644 dbms/tests/queries/0_stateless/01020_function_array_compact.sql diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.reference b/dbms/tests/queries/0_stateless/01020_function_array_compact.reference new file mode 100644 index 00000000000..6627a437251 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.reference @@ -0,0 +1,9 @@ +[0] +[1] +[2] +[1] +[1,2] +[1,2] +[1,2,1] +[2,1] +[1,2,3,4,5] diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql new file mode 100644 index 00000000000..ac309fe3f0a --- /dev/null +++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql @@ -0,0 +1,9 @@ +select arrayCompact([0]) +select arrayCompact([1]) +select arrayCompact([2]) +select arrayCompact([1,1]) +select arrayCompact([1,2]) +select arrayCompact([1,1,2]) +select arrayCompact([1,2,1]) +select arrayCompact([2,1,1]) +select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]) From 947c345eda602c0308180e1092134bab8f41a32f Mon Sep 17 00:00:00 2001 From: memo Date: Wed, 16 Oct 2019 14:05:43 +0800 Subject: [PATCH 031/222] style: Normalize the format --- dbms/src/Functions/array/arrayCompact.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp index 9f763a12ec3..0775c4cb7bb 100644 --- a/dbms/src/Functions/array/arrayCompact.cpp +++ b/dbms/src/Functions/array/arrayCompact.cpp @@ -6,7 +6,7 @@ namespace DB { - +/// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays namespace ErrorCodes { extern const int ILLEGAL_COLUMN; @@ -68,7 +68,7 @@ namespace DB } res_offsets[i] = res_pos; } - for(size_t i = 0; i < column_data->size() - res_pos; ++i) + for (size_t i = 0; i < column_data->size() - res_pos; ++i) { res_values.pop_back(); } @@ -92,7 +92,7 @@ namespace DB res_values[res_pos] = data[pos]; for (++pos, ++res_pos; pos < offsets[i]; ++pos) { - if(data[pos] != data[pos - 1]) + if (data[pos] != data[pos - 1]) { res_values[res_pos++] = data[pos]; } @@ -100,7 +100,7 @@ namespace DB } res_offsets[i] = res_pos; } - for(size_t i = 0; i < data.size() - res_pos; ++i) + for (size_t i = 0; i < data.size() - res_pos; ++i) { res_values.pop_back(); } From a2bf848e124a3118555362deaade2ce0f6eb0b13 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Tue, 15 Oct 2019 13:22:51 +0900 Subject: [PATCH 032/222] Made bloom_filter type of index supporting LowCardinality and Nullable --- dbms/src/Interpreters/BloomFilter.cpp | 34 +++++ dbms/src/Interpreters/BloomFilter.h | 6 + dbms/src/Interpreters/BloomFilterHash.h | 49 ++++-- .../MergeTree/MergeTreeIndexBloomFilter.cpp | 10 +- .../MergeTreeIndexConditionBloomFilter.cpp | 12 +- .../00945_bloom_filter_index.reference | 105 +++++++++++++ .../0_stateless/00945_bloom_filter_index.sql | 144 ++++++++++++++++++ 7 files changed, 335 insertions(+), 25 deletions(-) diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp index d648fd114f4..62897b6c774 100644 --- a/dbms/src/Interpreters/BloomFilter.cpp +++ b/dbms/src/Interpreters/BloomFilter.cpp @@ -1,5 +1,11 @@ #include #include +#include +#include +#include +#include +#include +#include namespace DB @@ -83,4 +89,32 @@ bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed return bool(filter[pos / (8 * sizeof(UnderType))] & (1ULL << (pos % (8 * sizeof(UnderType))))); } +const DataTypePtr getPrimitiveType(const DataTypePtr data_type) +{ + if (const auto * array_type = typeid_cast(data_type.get())) + return getPrimitiveType(array_type->getNestedType()); + + if (const auto * nullable_type = typeid_cast(data_type.get())) + return getPrimitiveType(nullable_type->getNestedType()); + + if (const auto * low_cardinality_type = typeid_cast(data_type.get())) + return getPrimitiveType(low_cardinality_type->getDictionaryType()); + + return data_type; +} + +const ColumnPtr getPrimitiveColumn(const ColumnPtr column) +{ + if (const auto * array_col = typeid_cast(column.get())) + return getPrimitiveColumn(array_col->getDataPtr()); + + if (const auto * nullable_col = typeid_cast(column.get())) + return getPrimitiveColumn(nullable_col->getNestedColumnPtr()); + + if (const auto * low_cardinality_col = typeid_cast(column.get())) + return getPrimitiveColumn(low_cardinality_col->convertToFullColumnIfLowCardinality()); + + return column; +} + } diff --git a/dbms/src/Interpreters/BloomFilter.h b/dbms/src/Interpreters/BloomFilter.h index 19469834c94..319a508e8c0 100644 --- a/dbms/src/Interpreters/BloomFilter.h +++ b/dbms/src/Interpreters/BloomFilter.h @@ -2,9 +2,12 @@ #include #include +#include #include #include +#include #include +#include namespace DB { @@ -53,4 +56,7 @@ using BloomFilterPtr = std::shared_ptr; bool operator== (const BloomFilter & a, const BloomFilter & b); +const DataTypePtr getPrimitiveType(const DataTypePtr data_type); +const ColumnPtr getPrimitiveColumn(const ColumnPtr column); + } diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h index 658f9790bee..0b458b6a7e9 100644 --- a/dbms/src/Interpreters/BloomFilterHash.h +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -10,9 +10,12 @@ #include #include #include +#include +#include #include #include #include +#include namespace DB { @@ -35,15 +38,38 @@ struct BloomFilterHash WhichDataType which(data_type); if (which.isUInt() || which.isDateOrDateTime()) - return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); + if (field.isNull() == false) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(field.safeGet())), 1); + else + return ColumnConst::create(ColumnUInt64::create(1, intHash64(0)), 1); else if (which.isInt() || which.isEnum()) - return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + if (field.isNull() == false) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(0))), 1); else if (which.isFloat32() || which.isFloat64()) - return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + if (field.isNull() == false) + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(field.safeGet()))), 1); + else + return ColumnConst::create(ColumnUInt64::create(1, intHash64(ext::bit_cast(0))), 1); else if (which.isString() || which.isFixedString()) { - const auto & value = field.safeGet(); - return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1); + if (field.isNull() == false) + { + const auto & value = field.safeGet(); + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1); + } + else + { + if (which.isString()) + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64("", 0)), 1); + else + { + const DataTypeFixedString * fixed_string_type = typeid_cast(data_type); + const char value[fixed_string_type->getN()] = { 0, }; + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(&value[0], fixed_string_type->getN())), 1); + } + } } else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); @@ -51,9 +77,6 @@ struct BloomFilterHash static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit) { - const IColumn * actual_col = column.get(); - const IDataType * actual_type = data_type.get(); - WhichDataType which(data_type); if (which.isArray()) { @@ -62,17 +85,17 @@ struct BloomFilterHash if (checkAndGetColumn(array_col->getData())) throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); - actual_col = array_col->getDataPtr().get(); - actual_type = static_cast(data_type.get())->getNestedType().get(); - const auto & offsets = array_col->getOffsets(); size_t offset = (pos == 0) ? 0 : offsets[pos - 1]; - limit = std::max(actual_col->size() - offset, limit); + limit = std::max(array_col->getDataPtr().get()->size() - offset, limit); } + const ColumnPtr actual_col = getPrimitiveColumn(column); + const DataTypePtr actual_type = getPrimitiveType(data_type); + auto index_column = ColumnUInt64::create(limit); ColumnUInt64::Container & index_column_vec = index_column->getData(); - getAnyTypeHash(actual_type, actual_col, index_column_vec, pos); + getAnyTypeHash(actual_type.get(), actual_col.get(), index_column_vec, pos); return index_column; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index 56c754cf979..7815cce35f1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -74,13 +75,8 @@ static void assertIndexColumnsType(const Block & header) for (size_t index = 0; index < columns_data_types.size(); ++index) { - WhichDataType which(columns_data_types[index]); - - if (which.isArray()) - { - const DataTypeArray * array_type = typeid_cast(columns_data_types[index].get()); - which = WhichDataType(array_type->getNestedType()); - } + const IDataType * actual_type = getPrimitiveType(columns_data_types[index]).get(); + WhichDataType which(actual_type); if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() && !which.isDateOrDateTime() && !which.isEnum()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 56a18122f29..488abb7f6a8 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -254,7 +254,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( size_t row_size = column->size(); size_t position = header.getPositionByName(key_ast->getColumnName()); const DataTypePtr & index_type = header.getByPosition(position).type; - const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context); + const auto & converted_column = castColumn(ColumnWithTypeAndName{getPrimitiveColumn(column), getPrimitiveType(type), ""}, index_type, context); out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); if (function_name == "in" || function_name == "globalIn") @@ -309,8 +309,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( if (!array_type) throw Exception("First argument for function has must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - Field converted_field = convertFieldToType(value_field, *array_type->getNestedType(), &*value_type); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*array_type->getNestedType(), converted_field))); + const DataTypePtr actual_type = getPrimitiveType(array_type->getNestedType()); + Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field))); } else { @@ -318,8 +319,9 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( throw Exception("An array type of bloom_filter supports only has() function.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; - Field converted_field = convertFieldToType(value_field, *index_type, &*value_type); - out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(&*index_type, converted_field))); + const DataTypePtr actual_type = getPrimitiveType(index_type); + Field converted_field = convertFieldToType(value_field, *actual_type.get(), &*value_type); + out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithField(actual_type.get(), converted_field))); } return true; diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference index 332e97bf5a1..7e9362b5d33 100755 --- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.reference @@ -28,6 +28,7 @@ 1 1 1 +100 1 1 1 @@ -70,3 +71,107 @@ 3 3 3 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +100 +1 +1 +1 +1 +100 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +3 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +2 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql index 03666dccd96..268574a609f 100755 --- a/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/dbms/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -47,6 +47,8 @@ SELECT COUNT() FROM test.bloom_filter_types_test WHERE date_time = toDateTime('1 SELECT COUNT() FROM test.bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; SELECT COUNT() FROM test.bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; +SELECT COUNT() FROM test.bloom_filter_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_types_test); + DROP TABLE IF EXISTS test.bloom_filter_types_test; DROP TABLE IF EXISTS test.bloom_filter_array_types_test; @@ -102,3 +104,145 @@ SELECT COUNT() FROM test.bloom_filter_array_types_test WHERE has(str, '10'); SELECT COUNT() FROM test.bloom_filter_array_types_test WHERE has(fixed_string, toFixedString('10', 5)); DROP TABLE IF EXISTS test.bloom_filter_array_types_test; + +DROP TABLE IF EXISTS test.bloom_filter_null_types_test; + +CREATE TABLE test.bloom_filter_null_types_test (order_key UInt64, i8 Nullable(Int8), i16 Nullable(Int16), i32 Nullable(Int32), i64 Nullable(Int64), u8 Nullable(UInt8), u16 Nullable(UInt16), u32 Nullable(UInt32), u64 Nullable(UInt64), f32 Nullable(Float32), f64 Nullable(Float64), date Nullable(Date), date_time Nullable(DateTime('Europe/Moscow')), str Nullable(String), fixed_string Nullable(FixedString(5)), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_null_types_test SELECT number AS order_key, toInt8(number) AS i8, toInt16(number) AS i16, toInt32(number) AS i32, toInt64(number) AS i64, toUInt8(number) AS u8, toUInt16(number) AS u16, toUInt32(number) AS u32, toUInt64(number) AS u64, toFloat32(number) AS f32, toFloat64(number) AS f64, toDate(number, 'Europe/Moscow') AS date, toDateTime(number, 'Europe/Moscow') AS date_time, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; +INSERT INTO test.bloom_filter_null_types_test SELECT 0 AS order_key, NULL AS i8, NULL AS i16, NULL AS i32, NULL AS i64, NULL AS u8, NULL AS u16, NULL AS u32, NULL AS u64, NULL AS f32, NULL AS f64, NULL AS date, NULL AS date_time, NULL AS str, NULL AS fixed_string; + +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE i64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u8 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u16 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE u64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; + +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i8); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i16); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i32); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(i64); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u8); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u16); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u32); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(u64); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(f32); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(f64); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(date); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(date_time); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(str); +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE isNull(fixed_string); + +SELECT COUNT() FROM test.bloom_filter_null_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_null_types_test); + +DROP TABLE IF EXISTS test.bloom_filter_null_types_test; + +DROP TABLE IF EXISTS test.bloom_filter_lc_null_types_test; + +CREATE TABLE test.bloom_filter_lc_null_types_test (order_key UInt64, str LowCardinality(Nullable(String)), fixed_string LowCardinality(Nullable(FixedString(5))), INDEX idx (str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_lc_null_types_test SELECT number AS order_key, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100; +INSERT INTO test.bloom_filter_lc_null_types_test SELECT 0 AS order_key, NULL AS str, NULL AS fixed_string; + +SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6; +SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12; + +SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE isNull(str); +SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE isNull(fixed_string); + +SELECT COUNT() FROM test.bloom_filter_lc_null_types_test WHERE str IN ( SELECT str FROM test.bloom_filter_lc_null_types_test); + +DROP TABLE IF EXISTS test.bloom_filter_lc_null_types_test; + +DROP TABLE IF EXISTS test.bloom_filter_array_lc_null_types_test; + +CREATE TABLE test.bloom_filter_array_lc_null_types_test (order_key Array(LowCardinality(Nullable((UInt64)))), i8 Array(LowCardinality(Nullable((Int8)))), i16 Array(LowCardinality(Nullable((Int16)))), i32 Array(LowCardinality(Nullable((Int32)))), i64 Array(LowCardinality(Nullable((Int64)))), u8 Array(LowCardinality(Nullable((UInt8)))), u16 Array(LowCardinality(Nullable((UInt16)))), u32 Array(LowCardinality(Nullable((UInt32)))), u64 Array(LowCardinality(Nullable((UInt64)))), f32 Array(LowCardinality(Nullable((Float32)))), f64 Array(LowCardinality(Nullable((Float64)))), date Array(LowCardinality(Nullable((Date)))), date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), str Array(LowCardinality(Nullable((String)))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 6; +INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers LIMIT 15); +INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 5 LIMIT 15); +INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT groupArray(number) AS order_key, groupArray(toInt8(number)) AS i8, groupArray(toInt16(number)) AS i16, groupArray(toInt32(number)) AS i32, groupArray(toInt64(number)) AS i64, groupArray(toUInt8(number)) AS u8, groupArray(toUInt16(number)) AS u16, groupArray(toUInt32(number)) AS u32, groupArray(toUInt64(number)) AS u64, groupArray(toFloat32(number)) AS f32, groupArray(toFloat64(number)) AS f64, groupArray(toDate(number, 'Europe/Moscow')) AS date, groupArray(toDateTime(number, 'Europe/Moscow')) AS date_time, groupArray(toString(number)) AS str, groupArray(toFixedString(toString(number), 5)) AS fixed_string FROM (SELECT number FROM system.numbers WHERE number >= 10 LIMIT 15); +INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT n AS order_key, n AS i8, n AS i16, n AS i32, n AS i64, n AS u8, n AS u16, n AS u32, n AS u64, n AS f32, n AS f64, n AS date, n AS date_time, n AS str, n AS fixed_string FROM (SELECT [NULL] AS n); +INSERT INTO test.bloom_filter_array_lc_null_types_test SELECT [NULL, n] AS order_key, [NULL, toInt8(n)] AS i8, [NULL, toInt16(n)] AS i16, [NULL, toInt32(n)] AS i32, [NULL, toInt64(n)] AS i64, [NULL, toUInt8(n)] AS u8, [NULL, toUInt16(n)] AS u16, [NULL, toUInt32(n)] AS u32, [NULL, toUInt64(n)] AS u64, [NULL, toFloat32(n)] AS f32, [NULL, toFloat64(n)] AS f64, [NULL, toDate(n, 'Europe/Moscow')] AS date, [NULL, toDateTime(n, 'Europe/Moscow')] AS date_time, [NULL, toString(n)] AS str, [NULL, toFixedString(toString(n), 5)] AS fixed_string FROM (SELECT 100 as n); + +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 1); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-02')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:01', 'Europe/Moscow')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '1'); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('1', 5)); + +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 5); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-06')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:05', 'Europe/Moscow')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '5'); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('5', 5)); + +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 10); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-01-11')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:00:10', 'Europe/Moscow')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '10'); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('10', 5)); + +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, NULL); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, NULL); + +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i8, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i16, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i32, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(i64, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u8, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u16, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u32, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(u64, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f32, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(f64, 100); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date, toDate('1970-04-11')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(date_time, toDateTime('1970-01-01 03:01:40', 'Europe/Moscow')); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '100'); +SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5)); + +DROP TABLE IF EXISTS test.bloom_filter_array_lc_null_types_test; From ef09cedbb821be5a8d06311ece6fda2047c2574a Mon Sep 17 00:00:00 2001 From: memo Date: Thu, 17 Oct 2019 11:38:49 +0800 Subject: [PATCH 033/222] fix test bug --- .../01020_function_array_compact.sql | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql index ac309fe3f0a..eea69dcb6da 100644 --- a/dbms/tests/queries/0_stateless/01020_function_array_compact.sql +++ b/dbms/tests/queries/0_stateless/01020_function_array_compact.sql @@ -1,9 +1,9 @@ -select arrayCompact([0]) -select arrayCompact([1]) -select arrayCompact([2]) -select arrayCompact([1,1]) -select arrayCompact([1,2]) -select arrayCompact([1,1,2]) -select arrayCompact([1,2,1]) -select arrayCompact([2,1,1]) -select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]) +select arrayCompact([0]); +select arrayCompact([1]); +select arrayCompact([2]); +select arrayCompact([1,1]); +select arrayCompact([1,2]); +select arrayCompact([1,1,2]); +select arrayCompact([1,2,1]); +select arrayCompact([2,1,1]); +select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]); From 08a348fbd10e6b1bc7202728d7a7f4e6e1209ebc Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Thu, 17 Oct 2019 19:08:36 +0900 Subject: [PATCH 034/222] Made IN operator support Array(LowCardinality(Nullable(String))) --- dbms/src/Interpreters/Set.cpp | 3 ++- .../queries/0_stateless/00688_low_cardinality_in.reference | 1 + dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Set.cpp b/dbms/src/Interpreters/Set.cpp index 68c219c3a91..330f0dc0287 100644 --- a/dbms/src/Interpreters/Set.cpp +++ b/dbms/src/Interpreters/Set.cpp @@ -424,7 +424,8 @@ void Set::checkColumnsNumber(size_t num_key_columns) const void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const { - if (!removeNullable(data_types[set_type_idx])->equals(*removeNullable(other_type))) + + if (!removeNullable(recursiveRemoveLowCardinality(data_types[set_type_idx]))->equals(*removeNullable(recursiveRemoveLowCardinality(other_type)))) throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: " + data_types[set_type_idx]->getName() + " on the right, " + other_type->getName() + " on the left.", ErrorCodes::TYPE_MISMATCH); diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference index 74266c7f888..8edea4d363a 100644 --- a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.reference @@ -10,3 +10,4 @@ a 1 b 1 1 1 2 1 +['1'] diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql index 09a96743847..cb57fad51a4 100644 --- a/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_in.sql @@ -9,3 +9,9 @@ select val, val in (select arrayJoin([1, 3])) from lc_00688; select str, str in (select str from lc_00688) from lc_00688; select val, val in (select val from lc_00688) from lc_00688; drop table if exists lc_00688; + +drop table if exists ary_lc_null; +CREATE TABLE ary_lc_null (i int, v Array(LowCardinality(Nullable(String)))) ENGINE = MergeTree() ORDER BY i ; +INSERT INTO ary_lc_null VALUES (1, ['1']); +SELECT v FROM ary_lc_null WHERE v IN (SELECT v FROM ary_lc_null); +drop table if exists ary_lc_null; From 8aac4d04de956c50d66fde4c6ece572906e013c9 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Thu, 17 Oct 2019 19:53:41 +0900 Subject: [PATCH 035/222] Fixed weird type-casting for IN-operator in bloom_filter --- dbms/src/Functions/FunctionHelpers.cpp | 8 ++++++++ .../MergeTree/MergeTreeIndexConditionBloomFilter.cpp | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 212a107e37c..4546b5dd8f0 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -4,8 +4,10 @@ #include #include #include +#include #include #include +#include #include @@ -75,6 +77,12 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un const auto & nested_col = checkAndGetColumn(const_column->getDataColumn())->getNestedColumnPtr(); res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name}); } + else if (auto * low_cardinality = checkAndGetColumn(*col.column)) + { + const DataTypePtr & low_cardinality_type = static_cast(*col.type).getDictionaryType(); + const auto & low_cardinality_col = low_cardinality->convertToFullColumnIfLowCardinality(); + res.insert({low_cardinality_col, low_cardinality_type, col.name}); + } else throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 488abb7f6a8..dbb9a113244 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -254,7 +254,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTIn( size_t row_size = column->size(); size_t position = header.getPositionByName(key_ast->getColumnName()); const DataTypePtr & index_type = header.getByPosition(position).type; - const auto & converted_column = castColumn(ColumnWithTypeAndName{getPrimitiveColumn(column), getPrimitiveType(type), ""}, index_type, context); + const auto & converted_column = castColumn(ColumnWithTypeAndName{column, type, ""}, index_type, context); out.predicate.emplace_back(std::make_pair(position, BloomFilterHash::hashWithColumn(index_type, converted_column, 0, row_size))); if (function_name == "in" || function_name == "globalIn") From 6e87eb9fd71b546155156abd9eb4bc0999c7a638 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 17 Oct 2019 16:47:19 +0300 Subject: [PATCH 036/222] DOCAPI-8163: EN review, RU translation. --- .../agg_functions/parametric_functions.md | 6 +- .../agg_functions/parametric_functions.md | 157 ++++++++++++++---- 2 files changed, 130 insertions(+), 33 deletions(-) diff --git a/docs/en/query_language/agg_functions/parametric_functions.md b/docs/en/query_language/agg_functions/parametric_functions.md index a044f7d97be..db946830c7e 100644 --- a/docs/en/query_language/agg_functions/parametric_functions.md +++ b/docs/en/query_language/agg_functions/parametric_functions.md @@ -89,7 +89,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described under these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. **Returned values** @@ -108,7 +108,7 @@ Type: `UInt8`. - `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern. -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators. +- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators. **Examples** @@ -178,7 +178,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../data_types/int_uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function only takes the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn't described in a condition, the function skips them. **Returned values** diff --git a/docs/ru/query_language/agg_functions/parametric_functions.md b/docs/ru/query_language/agg_functions/parametric_functions.md index b0ece3ced11..a5db4598c3b 100644 --- a/docs/ru/query_language/agg_functions/parametric_functions.md +++ b/docs/ru/query_language/agg_functions/parametric_functions.md @@ -71,51 +71,148 @@ FROM В этом случае необходимо помнить, что границы корзин гистограммы не известны. -## sequenceMatch(pattern)(time, cond1, cond2, ...) +## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) {#function-sequencematch} -Сопоставление с образцом для цепочки событий. - -`pattern` - строка, содержащая шаблон для сопоставления. Шаблон похож на регулярное выражение. - -`time` - время события, тип DateTime - -`cond1`, `cond2` ... - от одного до 32 аргументов типа UInt8 - признаков, было ли выполнено некоторое условие для события. - -Функция собирает в оперативке последовательность событий. Затем производит проверку на соответствие этой последовательности шаблону. -Возвращает UInt8 - 0, если шаблон не подходит и 1, если шаблон подходит. - -Пример: `sequenceMatch('(?1).*(?2)')(EventTime, URL LIKE '%company%', URL LIKE '%cart%')` - -- была ли цепочка событий, в которой посещение страницы с адресом, содержащим company было раньше по времени посещения страницы с адресом, содержащим cart. - -Это вырожденный пример. Его можно записать с помощью других агрегатных функций: +Проверяет, содержит ли последовательность цепочку событий, которая соответствует шаблону. ```sql -minIf(EventTime, URL LIKE '%company%') < maxIf(EventTime, URL LIKE '%cart%'). +sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -Но в более сложных случаях, такого решения нет. +!!! warning "Предупреждение" + События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. -Синтаксис шаблонов: -`(?1)` - ссылка на условие (вместо 1 - любой номер); +**Параметры** -`.*` - произвольное количество любых событий; +- `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax). -`(?t>=1800)` - условие на время; +- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md). -за указанное время допускается любое количество любых событий; +- `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий. -вместо `>=` могут использоваться операторы `<`, `>`, `<=`; -вместо 1800 может быть любое число; +**Возвращаемые значения** -События, произошедшие в одну секунду, могут оказаться в цепочке в произвольном порядке. От этого может зависеть результат работы функции. +- 1, если цепочка событий, соответствующая шаблону найдена. +- 0, если цепочка событий, соответствующая шаблону не найдена. -## sequenceCount(pattern)(time, cond1, cond2, ...) +Тип: `UInt8`. -Аналогично функции sequenceMatch, но возвращает не факт наличия цепочки событий, а UInt64 - количество найденных цепочек. -Цепочки ищутся без перекрытия. То есть, следующая цепочка может начаться только после окончания предыдущей. + +**Синтаксис шаблонов** + +- `(?N)` — соответствует условию на позиции `N`. Условия пронумерованы по порядку в диапазоне `[1, 32]`. Например, `(?1)` соответствует условию, заданному параметром `cond1`. + +- `.*` — соответствует любому количеству событий. Для этого элемента шаблона не надо задавать условия. + +- `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`. + +**Примеры** + +Пусть таблица `t` содержит следующие данные: + +```text +┌─time─┬─number─┐ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 2 │ +└──────┴────────┘ +``` + +Выполним запрос: + +```sql +SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2) FROM t +``` +```text +┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2))─┐ +│ 1 │ +└───────────────────────────────────────────────────────────────────────┘ +``` + +Функция нашла цепочку событий, в которой число 2 следует за числом 1. Число 3 между ними было пропущено, поскольку оно не описано как событие. Если необходимо учесть это число при поиске цепочки событий, заданной в примере, то необходимо задать для него условие. + +```sql +SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 3) FROM t +``` +```text +┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2), equals(number, 3))─┐ +│ 0 │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +В этом случае функция не может найти цепочку событий, соответствующую шаблону, поскольку событие для числа 3 произошло между 1 и 2. Если бы в этом же случае мы бы проверяли условие на событие для числа 4, то цепочка бы соответствовала шаблону. + +```sql +SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM t +``` +```text +┌─sequenceMatch('(?1)(?2)')(time, equals(number, 1), equals(number, 2), equals(number, 4))─┐ +│ 1 │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + + +**Смотрите также** + +- [sequenceCount](#function-sequencecount) + + +## sequenceCount(pattern)(time, cond1, cond2, ...) {#function-sequencecount} + +Вычисляет количество цепочек событий, соответствующих шаблону. Функция отыскивает непересекающиеся цепочки событий. Она начитает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. + +!!! warning "Предупреждение" + События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. + +```sql +sequenceCount(pattern)(timestamp, cond1, cond2, ...) +``` + +**Параметры** + +- `pattern` — строка с шаблоном. Смотрите [Синтаксис шаблонов](#sequence-function-pattern-syntax). + +- `timestamp` — столбец, содержащий метки времени. Типичный тип данных столбца — `Date` или `DateTime`. Также можно использовать любой из поддержанных типов данных [UInt](../../data_types/int_uint.md). + +- `cond1`, `cond2` — условия, описывающие цепочку событий. Тип данных — `UInt8`. Можно использовать до 32 условий. Функция учитывает только те события, которые указаны в условиях. Функция пропускает данные из последовательности, если они не описаны ни в одном из условий. + +**Возвращаемое значение** + +- Число непересекающихся цепочек событий, соответствущих шаблону. + +Тип: `UInt64`. + +**Пример** + +Пусть таблица `t` содержит следующие данные: + +```text +┌─time─┬─number─┐ +│ 1 │ 1 │ +│ 2 │ 3 │ +│ 3 │ 2 │ +│ 4 │ 1 │ +│ 5 │ 3 │ +│ 6 │ 2 │ +└──────┴────────┘ +``` + +Вычислим сколько раз число 2 стоит после числа 1, причем между 1 и 2 могут быть любые числа: + +```sql +SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t +``` +```text +┌─sequenceCount('(?1).*(?2)')(time, equals(number, 1), equals(number, 2))─┐ +│ 2 │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [sequenceMatch](#function-sequencematch) ## windowFunnel(window)(timestamp, cond1, cond2, cond3, ...) From 1d423fad7382c2fabd42e45afe96d23169331e29 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Fri, 18 Oct 2019 11:55:01 +0900 Subject: [PATCH 037/222] Blocked "nested-array-type" for the bloom_filter --- dbms/src/Interpreters/BloomFilter.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/BloomFilter.cpp b/dbms/src/Interpreters/BloomFilter.cpp index 62897b6c774..150ddbb5e0c 100644 --- a/dbms/src/Interpreters/BloomFilter.cpp +++ b/dbms/src/Interpreters/BloomFilter.cpp @@ -92,7 +92,12 @@ bool BloomFilter::findHashWithSeed(const UInt64 & hash, const UInt64 & hash_seed const DataTypePtr getPrimitiveType(const DataTypePtr data_type) { if (const auto * array_type = typeid_cast(data_type.get())) - return getPrimitiveType(array_type->getNestedType()); + { + if (!typeid_cast(array_type->getNestedType().get())) + return getPrimitiveType(array_type->getNestedType()); + else + throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR); + } if (const auto * nullable_type = typeid_cast(data_type.get())) return getPrimitiveType(nullable_type->getNestedType()); From decd5553170cd377a84c5817d4b4e189784018b0 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 18 Oct 2019 15:30:41 +0800 Subject: [PATCH 038/222] Cleanup, keep function names consistent --- dbms/src/Functions/array/arraySort.cpp | 2 +- dbms/src/Functions/registerFunctionsHigherOrder.cpp | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/array/arraySort.cpp b/dbms/src/Functions/array/arraySort.cpp index 17a711e8902..35f03cd10ca 100644 --- a/dbms/src/Functions/array/arraySort.cpp +++ b/dbms/src/Functions/array/arraySort.cpp @@ -63,7 +63,7 @@ struct NameArrayReverseSort { static constexpr auto name = "arrayReverseSort"; } using FunctionArraySort = FunctionArrayMapped, NameArraySort>; using FunctionArrayReverseSort = FunctionArrayMapped, NameArrayReverseSort>; -void registerFunctionsArraySort(FunctionFactory & factory) +void registerFunctionArraySort(FunctionFactory & factory) { factory.registerFunction(); factory.registerFunction(); diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp index 2e8b678240b..46e89850582 100644 --- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp +++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp @@ -12,8 +12,7 @@ void registerFunctionArraySum(FunctionFactory &); void registerFunctionArrayFirst(FunctionFactory &); void registerFunctionArrayFirstIndex(FunctionFactory &); void registerFunctionArraySplit(FunctionFactory &); -void registerFunctionsArraySort(FunctionFactory &); -void registerFunctionArrayReverseSort(FunctionFactory &); +void registerFunctionArraySort(FunctionFactory &); void registerFunctionArrayCumSum(FunctionFactory &); void registerFunctionArrayCumSumNonNegative(FunctionFactory &); void registerFunctionArrayDifference(FunctionFactory &); @@ -29,7 +28,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArrayFirst(factory); registerFunctionArrayFirstIndex(factory); registerFunctionArraySplit(factory); - registerFunctionsArraySort(factory); + registerFunctionArraySort(factory); registerFunctionArrayCumSum(factory); registerFunctionArrayCumSumNonNegative(factory); registerFunctionArrayDifference(factory); From 124ea9699acbfdbb99935d7aa0eccc926bb323c8 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 18 Oct 2019 16:45:39 +0800 Subject: [PATCH 039/222] Add arrayFill --- dbms/src/Functions/array/arrayFill.cpp | 127 ++++++++++++++++++ .../registerFunctionsHigherOrder.cpp | 2 + 2 files changed, 129 insertions(+) create mode 100644 dbms/src/Functions/array/arrayFill.cpp diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp new file mode 100644 index 00000000000..4c2dc5659b8 --- /dev/null +++ b/dbms/src/Functions/array/arrayFill.cpp @@ -0,0 +1,127 @@ +#include +#include +#include "FunctionArrayMapped.h" +#include + + +namespace DB +{ + +template +struct ArrayFillImpl +{ + static bool needBoolean() { return true; } + static bool needExpression() { return true; } + static bool needOneArray() { return false; } + + static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element) + { + return std::make_shared(array_element); + } + + static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped) + { + const ColumnUInt8 * column_fill = typeid_cast(&*mapped); + + const IColumn & in_data = array.getData(); + const IColumn::Offsets & in_offsets = array.getOffsets(); + auto column_data = in_data.cloneEmpty(); + IColumn & out_data = *column_data.get(); + + if (column_fill) + { + const IColumn::Filter & fill = column_fill->getData(); + + size_t array_begin = 0; + size_t array_end = 0; + size_t begin = 0; + size_t end = 0; + + out_data.reserve(in_data.size()); + + for (size_t i = 0; i < in_offsets.size(); ++i) + { + array_end = in_offsets[i] - 1; + + for (; end <= array_end; ++end) + { + if (end == array_end || fill[end + 1] != fill[begin]) { + if (fill[begin]) + { + if constexpr (Reverse) + { + if (end == array_end) + out_data.insertManyFrom(in_data, array_end, end + 1 - begin); + else + out_data.insertManyFrom(in_data, end + 1, end + 1 - begin); + } + else + { + if (begin == array_begin) + out_data.insertManyFrom(in_data, array_begin, end + 1 - begin); + else + out_data.insertManyFrom(in_data, begin - 1, end + 1 - begin); + } + } + else + out_data.insertRangeFrom(in_data, begin, end + 1 - begin); + + begin = end + 1; + } + } + + array_begin = array_end + 1; + } + } + else + { + auto column_fill_const = checkAndGetColumnConst(&*mapped); + + if (!column_fill_const) + throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_fill_const->getValue()) + { + size_t array_begin = 0; + size_t array_end = 0; + + out_data.reserve(in_data.size()); + + for (size_t i = 0; i < in_offsets.size(); ++i) + { + array_end = in_offsets[i] - 1; + + if constexpr (Reverse) + out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin); + else + out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin); + + array_begin = array_end + 1; + } + } + else + return ColumnArray::create( + array.getDataPtr(), + array.getOffsetsPtr() + ); + } + + return ColumnArray::create( + std::move(column_data), + array.getOffsetsPtr() + ); + } +}; + +struct NameArrayFill { static constexpr auto name = "arrayFill"; }; +struct NameArrayReverseFill { static constexpr auto name = "arrayReverseFill"; }; +using FunctionArrayFill = FunctionArrayMapped, NameArrayFill>; +using FunctionArrayReverseFill = FunctionArrayMapped, NameArrayReverseFill>; + +void registerFunctionArrayFill(FunctionFactory & factory) +{ + factory.registerFunction(); + factory.registerFunction(); +} + +} diff --git a/dbms/src/Functions/registerFunctionsHigherOrder.cpp b/dbms/src/Functions/registerFunctionsHigherOrder.cpp index 46e89850582..8511c0c412c 100644 --- a/dbms/src/Functions/registerFunctionsHigherOrder.cpp +++ b/dbms/src/Functions/registerFunctionsHigherOrder.cpp @@ -11,6 +11,7 @@ void registerFunctionArrayAll(FunctionFactory &); void registerFunctionArraySum(FunctionFactory &); void registerFunctionArrayFirst(FunctionFactory &); void registerFunctionArrayFirstIndex(FunctionFactory &); +void registerFunctionArrayFill(FunctionFactory &); void registerFunctionArraySplit(FunctionFactory &); void registerFunctionArraySort(FunctionFactory &); void registerFunctionArrayCumSum(FunctionFactory &); @@ -27,6 +28,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArraySum(factory); registerFunctionArrayFirst(factory); registerFunctionArrayFirstIndex(factory); + registerFunctionArrayFill(factory); registerFunctionArraySplit(factory); registerFunctionArraySort(factory); registerFunctionArrayCumSum(factory); From b87fe27cd6ad9f875d3bf5229667310cae56ccd0 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 18 Oct 2019 16:45:56 +0800 Subject: [PATCH 040/222] Add tests --- .../queries/0_stateless/01019_array_fill.reference | 10 ++++++++++ dbms/tests/queries/0_stateless/01019_array_fill.sql | 11 +++++++++++ 2 files changed, 21 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01019_array_fill.reference create mode 100644 dbms/tests/queries/0_stateless/01019_array_fill.sql diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.reference b/dbms/tests/queries/0_stateless/01019_array_fill.reference new file mode 100644 index 00000000000..97841f800c7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01019_array_fill.reference @@ -0,0 +1,10 @@ +[1,2,3,11,12,13,4,5,6,14,15,16] +[1,2,3,11,12,13,4,5,6,14,15,16] +[1,1,1,1,1,1,1,1,1,1,1,1] +[16,16,16,16,16,16,16,16,16,16,16,16] +[1,1,1,11,12,13,13,13,13,14,15,16] +[11,11,11,11,12,13,14,14,14,14,15,16] +[1,1,3,11,12,12,12,5,6,14,14,14] +[1,3,3,11,12,5,5,5,6,14,NULL,NULL] +[1,1,3,11,11,11,11,5,6,14,14,14] +[3,3,3,11,5,5,5,5,6,14,16,16] diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.sql b/dbms/tests/queries/0_stateless/01019_array_fill.sql new file mode 100644 index 00000000000..33e064d8cb1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01019_array_fill.sql @@ -0,0 +1,11 @@ +SELECT arrayFill(x -> 0, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); +SELECT arrayReverseFill(x -> 0, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); +SELECT arrayFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); +SELECT arrayReverseFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); + +SELECT arrayFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); +SELECT arrayReverseFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); +SELECT arrayFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); +SELECT arrayReverseFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); +SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]); +SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]); From 5f1b14e313d71e78830fd81a791cb7911a565cfa Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Fri, 18 Oct 2019 17:41:54 +0900 Subject: [PATCH 041/222] Remove a dynamically sized array in BloomFilterHash.h --- dbms/src/Interpreters/BloomFilterHash.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/BloomFilterHash.h b/dbms/src/Interpreters/BloomFilterHash.h index 0b458b6a7e9..e2af70b7ad8 100644 --- a/dbms/src/Interpreters/BloomFilterHash.h +++ b/dbms/src/Interpreters/BloomFilterHash.h @@ -66,8 +66,8 @@ struct BloomFilterHash else { const DataTypeFixedString * fixed_string_type = typeid_cast(data_type); - const char value[fixed_string_type->getN()] = { 0, }; - return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(&value[0], fixed_string_type->getN())), 1); + const std::vector value(fixed_string_type->getN(), 0); + return ColumnConst::create(ColumnUInt64::create(1, CityHash_v1_0_2::CityHash64(value.data(), value.size())), 1); } } } From 73e7131051d3ef20aebecbb0e39f4760ad18e952 Mon Sep 17 00:00:00 2001 From: achimbab <36371084+achimbab@users.noreply.github.com> Date: Fri, 18 Oct 2019 22:29:42 +0900 Subject: [PATCH 042/222] Fixd mergetree.md about types and a function for bloom_filter. --- docs/en/operations/table_engines/mergetree.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index a8d4d62f2d0..b41816996b5 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -306,9 +306,9 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025. - Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`. + Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`. - The following functions can use it: [equals](../../query_language/functions/comparison_functions.md), [notEquals](../../query_language/functions/comparison_functions.md), [in](../../query_language/functions/in_functions.md), [notIn](../../query_language/functions/in_functions.md). + The following functions can use it: [equals](../../query_language/functions/comparison_functions.md), [notEquals](../../query_language/functions/comparison_functions.md), [in](../../query_language/functions/in_functions.md), [notIn](../../query_language/functions/in_functions.md), [has](../../query_language/functions/array_functions.md). ```sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 From 2be06255b12db66ec020313e194961a2293e0dbf Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 18 Oct 2019 21:31:18 +0800 Subject: [PATCH 043/222] Update arrayFill.cpp --- dbms/src/Functions/array/arrayFill.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp index 4c2dc5659b8..b35c729010e 100644 --- a/dbms/src/Functions/array/arrayFill.cpp +++ b/dbms/src/Functions/array/arrayFill.cpp @@ -45,7 +45,8 @@ struct ArrayFillImpl for (; end <= array_end; ++end) { - if (end == array_end || fill[end + 1] != fill[begin]) { + if (end == array_end || fill[end + 1] != fill[begin]) + { if (fill[begin]) { if constexpr (Reverse) From 5e32bf1ae7eaa4b22d5913084f91117a273bbbb4 Mon Sep 17 00:00:00 2001 From: "philip.han" Date: Sat, 19 Oct 2019 21:38:45 +0900 Subject: [PATCH 044/222] Removed unchecked type-casting --- dbms/src/Functions/FunctionHelpers.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 4546b5dd8f0..0ab342b84ba 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -79,9 +79,8 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un } else if (auto * low_cardinality = checkAndGetColumn(*col.column)) { - const DataTypePtr & low_cardinality_type = static_cast(*col.type).getDictionaryType(); const auto & low_cardinality_col = low_cardinality->convertToFullColumnIfLowCardinality(); - res.insert({low_cardinality_col, low_cardinality_type, col.name}); + res.insert({low_cardinality_col, nested_type, col.name}); } else throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN); From 79a27ece1a9654bc66596dabefe882be6ce41b71 Mon Sep 17 00:00:00 2001 From: hcz Date: Mon, 21 Oct 2019 11:19:11 +0800 Subject: [PATCH 045/222] Flip the condition column in arrayFill --- dbms/src/Functions/array/arrayFill.cpp | 40 +++++++++---------- .../0_stateless/01019_array_fill.reference | 8 ++-- .../queries/0_stateless/01019_array_fill.sql | 8 ++-- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/dbms/src/Functions/array/arrayFill.cpp b/dbms/src/Functions/array/arrayFill.cpp index 4c2dc5659b8..1b6d72027e0 100644 --- a/dbms/src/Functions/array/arrayFill.cpp +++ b/dbms/src/Functions/array/arrayFill.cpp @@ -47,6 +47,8 @@ struct ArrayFillImpl { if (end == array_end || fill[end + 1] != fill[begin]) { if (fill[begin]) + out_data.insertRangeFrom(in_data, begin, end + 1 - begin); + else { if constexpr (Reverse) { @@ -63,8 +65,6 @@ struct ArrayFillImpl out_data.insertManyFrom(in_data, begin - 1, end + 1 - begin); } } - else - out_data.insertRangeFrom(in_data, begin, end + 1 - begin); begin = end + 1; } @@ -81,29 +81,27 @@ struct ArrayFillImpl throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN); if (column_fill_const->getValue()) - { - size_t array_begin = 0; - size_t array_end = 0; - - out_data.reserve(in_data.size()); - - for (size_t i = 0; i < in_offsets.size(); ++i) - { - array_end = in_offsets[i] - 1; - - if constexpr (Reverse) - out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin); - else - out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin); - - array_begin = array_end + 1; - } - } - else return ColumnArray::create( array.getDataPtr(), array.getOffsetsPtr() ); + + size_t array_begin = 0; + size_t array_end = 0; + + out_data.reserve(in_data.size()); + + for (size_t i = 0; i < in_offsets.size(); ++i) + { + array_end = in_offsets[i] - 1; + + if constexpr (Reverse) + out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin); + else + out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin); + + array_begin = array_end + 1; + } } return ColumnArray::create( diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.reference b/dbms/tests/queries/0_stateless/01019_array_fill.reference index 97841f800c7..08982beb62e 100644 --- a/dbms/tests/queries/0_stateless/01019_array_fill.reference +++ b/dbms/tests/queries/0_stateless/01019_array_fill.reference @@ -1,9 +1,9 @@ -[1,2,3,11,12,13,4,5,6,14,15,16] -[1,2,3,11,12,13,4,5,6,14,15,16] [1,1,1,1,1,1,1,1,1,1,1,1] [16,16,16,16,16,16,16,16,16,16,16,16] -[1,1,1,11,12,13,13,13,13,14,15,16] -[11,11,11,11,12,13,14,14,14,14,15,16] +[1,2,3,11,12,13,4,5,6,14,15,16] +[1,2,3,11,12,13,4,5,6,14,15,16] +[1,2,3,3,3,3,4,5,6,6,6,6] +[1,2,3,4,4,4,4,5,6,16,16,16] [1,1,3,11,12,12,12,5,6,14,14,14] [1,3,3,11,12,5,5,5,6,14,NULL,NULL] [1,1,3,11,11,11,11,5,6,14,14,14] diff --git a/dbms/tests/queries/0_stateless/01019_array_fill.sql b/dbms/tests/queries/0_stateless/01019_array_fill.sql index 33e064d8cb1..af48e8d0be4 100644 --- a/dbms/tests/queries/0_stateless/01019_array_fill.sql +++ b/dbms/tests/queries/0_stateless/01019_array_fill.sql @@ -5,7 +5,7 @@ SELECT arrayReverseFill(x -> 1, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); SELECT arrayFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); SELECT arrayReverseFill(x -> x < 10, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16]); -SELECT arrayFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); -SELECT arrayReverseFill(x -> isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); -SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]); -SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1]); +SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); +SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]); +SELECT arrayFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0]); +SELECT arrayReverseFill((x, y) -> y, [1, 2, 3, 11, 12, 13, 4, 5, 6, 14, 15, 16], [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0]); From f7d2e1b758703a3dc6bfb0d6e314c0aaff8fbbf1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sun, 20 Oct 2019 12:12:42 +0300 Subject: [PATCH 046/222] Added Pipe class. Updated MergeTreeDataSelectExecutor. --- .../src/Processors/Executors/TreeExecutor.cpp | 14 +-- dbms/src/Processors/Executors/TreeExecutor.h | 10 +- dbms/src/Processors/IProcessor.h | 3 + dbms/src/Processors/ISource.h | 2 + dbms/src/Processors/Pipe.cpp | 87 +++++++++++++ dbms/src/Processors/Pipe.h | 37 ++++++ dbms/src/Storages/IStorage.h | 4 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 117 ++++++------------ dbms/src/Storages/StorageMergeTree.cpp | 1 + .../src/Storages/StorageReplicatedMergeTree.h | 1 + 10 files changed, 185 insertions(+), 91 deletions(-) create mode 100644 dbms/src/Processors/Pipe.cpp create mode 100644 dbms/src/Processors/Pipe.h diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutor.cpp index 94e2dfe5b5a..b53b32455ab 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutor.cpp @@ -79,13 +79,13 @@ void TreeExecutor::init() if (processors.empty()) throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR); - root = processors.back().get(); + root = &output_port.getProcessor(); validateTree(processors, root, sources_with_progress); - port = std::make_unique(getHeader(), root); - connect(root->getOutputs().front(), *port); - port->setNeeded(); + input_port = std::make_unique(getHeader(), root); + connect(output_port, *input_port); + input_port->setNeeded(); } void TreeExecutor::execute() @@ -170,11 +170,11 @@ Block TreeExecutor::readImpl() { while (true) { - if (port->isFinished()) + if (input_port->isFinished()) return {}; - if (port->hasData()) - return getHeader().cloneWithColumns(port->pull().detachColumns()); + if (input_port->hasData()) + return getHeader().cloneWithColumns(input_port->pull().detachColumns()); execute(); } diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutor.h index 51fc82200b8..d4817d6c99b 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.h +++ b/dbms/src/Processors/Executors/TreeExecutor.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include namespace DB { @@ -18,7 +18,10 @@ public: /// * processors form a tree /// * all processors are attainable from root /// * there is no other connected processors - explicit TreeExecutor(Processors processors_) : processors(std::move(processors_)) { init(); } + explicit TreeExecutor(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors()) + { + init(); + } String getName() const override { return root->getName(); } Block getHeader() const override { return root->getOutputs().front().getHeader(); } @@ -35,9 +38,10 @@ protected: Block readImpl() override; private: + OutputPort & output_port; Processors processors; IProcessor * root = nullptr; - std::unique_ptr port; + std::unique_ptr input_port; /// Remember sources that support progress. std::vector sources_with_progress; diff --git a/dbms/src/Processors/IProcessor.h b/dbms/src/Processors/IProcessor.h index 7a9a6fee755..ed59f4e591d 100644 --- a/dbms/src/Processors/IProcessor.h +++ b/dbms/src/Processors/IProcessor.h @@ -226,6 +226,9 @@ public: auto & getInputs() { return inputs; } auto & getOutputs() { return outputs; } + const auto & getInputs() const { return inputs; } + const auto & getOutputs() const { return outputs; } + /// Debug output. void dump() const; diff --git a/dbms/src/Processors/ISource.h b/dbms/src/Processors/ISource.h index b1669860192..9be21c3a398 100644 --- a/dbms/src/Processors/ISource.h +++ b/dbms/src/Processors/ISource.h @@ -27,4 +27,6 @@ public: const OutputPort & getPort() const { return output; } }; +using SourcePtr = std::shared_ptr; + } diff --git a/dbms/src/Processors/Pipe.cpp b/dbms/src/Processors/Pipe.cpp new file mode 100644 index 00000000000..4511b468061 --- /dev/null +++ b/dbms/src/Processors/Pipe.cpp @@ -0,0 +1,87 @@ +#include +#include + +namespace DB +{ + +static void checkSingleInput(const IProcessor & transform) +{ + if (transform.getInputs().size() != 1) + throw Exception("Processor for pipe should have single input, " + "but " + transform.getName() + " has " + + toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkMultipleInputs(const IProcessor & transform, size_t num_inputs) +{ + if (transform.getInputs().size() != num_inputs) + throw Exception("Processor for pipe should have " + toString(num_inputs) + " inputs, " + "but " + transform.getName() + " has " + + toString(transform.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkSingleOutput(const IProcessor & transform) +{ + if (transform.getOutputs().size() != 1) + throw Exception("Processor for pipe should have single output, " + "but " + transform.getName() + " has " + + toString(transform.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); +} + +static void checkSimpleTransform(const IProcessor & transform) +{ + checkSingleInput(transform); + checkSingleOutput(transform); +} + +static void checkSource(const IProcessor & source) +{ + if (!source.getInputs().empty()) + throw Exception("Source for pipe shouldn't have any input, but " + source.getName() + " has " + + toString(source.getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); + + if (source.getOutputs().empty()) + throw Exception("Source for pipe should have single output, but it doesn't have any", + ErrorCodes::LOGICAL_ERROR); + + if (source.getOutputs().size() != 1) + throw Exception("Source for pipe should have single output, but " + source.getName() + " has " + + toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); +} + + +Pipe::Pipe(ProcessorPtr source) +{ + checkSource(*source); + output_port = &source->getOutputs().front(); + processors.emplace_back(std::move(source)); +} + +Pipe::Pipe(Pipes && pipes, ProcessorPtr transform) +{ + checkSingleOutput(*transform); + checkMultipleInputs(*transform, pipes.size()); + + auto it = transform->getInputs().begin(); + + for (auto & pipe : pipes) + { + connect(*pipe.output_port, *it); + ++it; + + processors.insert(processors.end(), pipe.processors.begin(), pipe.processors.end()); + } + + output_port = &transform->getOutputs().front(); + processors.emplace_back(std::move(transform)); +} + +void Pipe::addSimpleTransform(ProcessorPtr transform) +{ + checkSimpleTransform(*transform); + connect(*output_port, transform->getInputs().front()); + output_port = &transform->getOutputs().front(); + processors.emplace_back(std::move(transform)); +} + +} diff --git a/dbms/src/Processors/Pipe.h b/dbms/src/Processors/Pipe.h new file mode 100644 index 00000000000..55b397c82d6 --- /dev/null +++ b/dbms/src/Processors/Pipe.h @@ -0,0 +1,37 @@ +#include + +namespace DB +{ + +class Pipe; +using Pipes = std::vector; + +/// Pipe is a set of processors which represents the part of pipeline with single output. +/// All processors in pipe are connected. All ports are connected except the output one. +class Pipe +{ +public: + explicit Pipe(ProcessorPtr source); + Pipe(Pipes && pipes, ProcessorPtr transform); + + Pipe(const Pipe & other) = delete; + Pipe(Pipe && other) = default; + + Pipe & operator=(const Pipe & other) = delete; + Pipe & operator=(Pipe && other) = default; + + OutputPort & getPort() const { return *output_port; } + const Block & getHeader() const { return output_port->getHeader(); } + + /// Add transform to pipe. It must have single input and single output (is checked). + /// Input will be connected with current output port, output port will be updated. + void addSimpleTransform(ProcessorPtr transform); + + Processors detachProcessors() && { return std::move(processors); } + +private: + Processors processors; + OutputPort * output_port = nullptr; +}; + +} diff --git a/dbms/src/Storages/IStorage.h b/dbms/src/Storages/IStorage.h index b224f84be97..6958d7be54b 100644 --- a/dbms/src/Storages/IStorage.h +++ b/dbms/src/Storages/IStorage.h @@ -45,7 +45,9 @@ class PartitionCommands; class IProcessor; using ProcessorPtr = std::shared_ptr; using Processors = std::vector; -using Pipes = std::vector; + +class Pipe; +using Pipes = std::vector; struct ColumnSize { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 3be9c2cc0dc..80c8b337536 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -632,34 +632,23 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( if (use_sampling) { for (auto & pipe : res) - { - auto & output = pipe.back()->getOutputs().front(); - pipe.emplace_back(std::make_shared(output.getHeader(), filter_expression, filter_function->getColumnName(), false)); - connect(output, pipe.back()->getInputs().front()); - } + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), filter_expression, filter_function->getColumnName(), false)); } /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values. if (sample_factor_column_queried) { for (auto & pipe : res) - { - auto & output = pipe.back()->getOutputs().front(); - pipe.emplace_back(std::make_shared>( - output.getHeader(), std::make_shared(), used_sample_factor, "_sample_factor")); - connect(output, pipe.back()->getInputs().front()); - } + pipe.addSimpleTransform(std::make_shared>( + pipe.getHeader(), std::make_shared(), used_sample_factor, "_sample_factor")); } if (query_info.prewhere_info && query_info.prewhere_info->remove_columns_actions) { for (auto & pipe : res) - { - auto & output = pipe.back()->getOutputs().front(); - pipe.emplace_back(std::make_shared( - output.getHeader(), query_info.prewhere_info->remove_columns_actions)); - connect(output, pipe.back()->getInputs().front()); - } + pipe.addSimpleTransform(std::make_shared( + pipe.getHeader(), query_info.prewhere_info->remove_columns_actions)); } return res; @@ -760,7 +749,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( source->addTotalRowsApprox(total_rows); } - res.push_back({std::move(source)}); + res.emplace_back(std::move(source)); } } else if (sum_marks > 0) @@ -833,7 +822,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - res.push_back({std::move(source_processor)}); + res.emplace_back(std::move(source_processor)); } } @@ -892,10 +881,10 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( if (sum_marks > max_marks_to_use_cache) use_uncompressed_cache = false; - Pipes pipes; + Pipes res; if (sum_marks == 0) - return pipes; + return res; /// Let's split ranges to avoid reading much data. auto split_ranges = [rows_granularity = data_settings->index_granularity, max_block_size](const auto & ranges, int direction) @@ -949,8 +938,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( { size_t need_marks = min_marks_per_stream; - std::vector streams_per_thread; - Processors pipe; + Pipes pipes; /// Loop over parts. /// We will iteratively take part or some subrange of a part from the back @@ -1012,58 +1000,44 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( if (sorting_info->direction == 1) { - pipe.push_back({std::make_shared( + pipes.emplace_back(std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)}); + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)); } else { - pipe.push_back({std::make_shared( + pipes.emplace_back(std::make_shared( data, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, use_uncompressed_cache, query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, - settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)}); + settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query)); - auto & output = pipe.back()->getOutputs().front(); - auto reverse_processor = std::make_shared(output.getHeader()); - connect(output, reverse_processor->getInputs().front()); - pipe.emplace_back(std::move(reverse_processor)); + pipes.back().addSimpleTransform(std::make_shared(pipes.back().getHeader())); } - - streams_per_thread.emplace_back(&pipe.back()->getOutputs().front()); } - if (streams_per_thread.size() > 1) + if (pipes.size() > 1) { SortDescription sort_description; for (size_t j = 0; j < query_info.sorting_info->prefix_order_descr.size(); ++j) sort_description.emplace_back(data.sorting_key_columns[j], sorting_info->direction, 1); - for (auto & stream : streams_per_thread) - { - pipe.emplace_back(std::make_shared(stream->getHeader(), sorting_key_prefix_expr)); - connect(*stream, pipe.back()->getInputs().front()); - stream = &pipe.back()->getOutputs().front(); - } + for (auto & pipe : pipes) + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), sorting_key_prefix_expr)); - pipe.push_back(std::make_shared( - streams_per_thread.back()->getHeader(), streams_per_thread.size(), sort_description, max_block_size)); + auto merging_sorted = std::make_shared( + pipes.back().getHeader(), pipes.size(), sort_description, max_block_size); - auto it = streams_per_thread.begin(); - for (auto & input : pipe.back()->getInputs()) - { - connect(**it, input); - ++it; - } + res.emplace_back(std::move(pipes), std::move(merging_sorted)); } - - pipes.push_back(std::move(pipe)); + else + res.emplace_back(std::move(pipes.front())); } - return pipes; + return res; } @@ -1102,7 +1076,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( use_uncompressed_cache = false; Pipes pipes; - std::vector to_merge; /// NOTE `merge_tree_uniform_read_distribution` is not used for FINAL @@ -1116,13 +1089,8 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( query_info.prewhere_info, true, settings.min_bytes_to_use_direct_io, settings.max_read_buffer_size, true, virt_columns, part.part_index_in_query); - auto & output = source_processor->getPort(); - auto expression_transform = std::make_shared(output.getHeader(), data.sorting_key_expr); - connect(output, expression_transform->getInputPort()); - - to_merge.emplace_back(&expression_transform->getOutputPort()); - - Processors pipe { std::move(source_processor), std::move(expression_transform) }; + Pipe pipe(std::move(source_processor)); + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.sorting_key_expr)); pipes.emplace_back(std::move(pipe)); } @@ -1131,31 +1099,34 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); - Block header = to_merge.at(0)->getHeader(); + Block header = pipes.at(0).getHeader(); for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); auto streams_to_merge = [&]() { - size_t num_streams = to_merge.size(); + size_t num_streams = pipes.size(); BlockInputStreams streams; streams.reserve(num_streams); for (size_t i = 0; i < num_streams; ++i) - streams.emplace_back(std::make_shared(pipes[i])); + streams.emplace_back(std::make_shared(std::move(pipes[i]))); pipes.clear(); return streams; }; - ProcessorPtr merged_processor; BlockInputStreamPtr merged; switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: - merged_processor = std::make_shared(header, to_merge.size(), sort_description, max_block_size); + { + auto merged_processor = + std::make_shared(header, pipes.size(), sort_description, max_block_size); + pipes.emplace_back(std::move(pipes), std::move(merged_processor)); break; + } case MergeTreeData::MergingParams::Collapsing: merged = std::make_shared( @@ -1186,23 +1157,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( } if (merged) - return {{std::make_shared(merged)}}; + pipes.emplace_back(std::make_shared(merged)); - auto it = to_merge.begin(); - for (auto & input : merged_processor->getInputs()) - { - connect(**it, input); - ++it; - } - - Processors result; - result.reserve(2 * pipes.size() + 1); - for (auto & pipe : pipes) - for (auto & processor : pipe) - result.emplace_back(std::move(processor)); - - result.emplace_back(merged_processor); - return {result}; + return pipes; } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index df541810258..243072b6d07 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -25,6 +25,7 @@ #include #include #include +#include namespace DB diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 90d36c1369e..fc6421b7a1c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -27,6 +27,7 @@ #include #include #include +#include namespace DB From 4ca83a8eb58ebbf1bd091e4d4d817a7ce8035fff Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 21 Oct 2019 18:24:15 +0300 Subject: [PATCH 047/222] Disable processors by default. --- dbms/src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 8f2474982a0..c65b7eb17c6 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -380,7 +380,7 @@ struct Settings : public SettingsCollection \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ - M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ + M(SettingBool, allow_experimental_low_cardinality_type, false, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From 2b334a4adbeb5146e382b75b7b13b38accdc9244 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 21 Oct 2019 19:26:29 +0300 Subject: [PATCH 048/222] Review fixes. --- dbms/src/DataStreams/ExecutionSpeedLimits.cpp | 33 +++++++++-------- dbms/src/DataStreams/ExecutionSpeedLimits.h | 13 +++---- .../Interpreters/InterpreterSelectQuery.cpp | 8 ++--- dbms/src/Processors/Chunk.cpp | 9 +++++ dbms/src/Processors/Chunk.h | 7 ++-- ...r.cpp => TreeExecutorBlockInputStream.cpp} | 36 +++++++++---------- ...cutor.h => TreeExecutorBlockInputStream.h} | 6 ++-- .../Transforms/AddingConstColumnTransform.h | 6 +--- .../Transforms/MergingSortedTransform.cpp | 2 +- dbms/src/Storages/IStorage.cpp | 4 +-- .../MergeTreeBaseSelectProcessor.cpp | 12 +++---- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 6 ++-- .../MergeTree/StorageFromMergeTreeDataPart.h | 4 +-- 13 files changed, 79 insertions(+), 67 deletions(-) rename dbms/src/Processors/Executors/{TreeExecutor.cpp => TreeExecutorBlockInputStream.cpp} (83%) rename dbms/src/Processors/Executors/{TreeExecutor.h => TreeExecutorBlockInputStream.h} (86%) diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp index 8886ca4b2b8..532c693bd47 100644 --- a/dbms/src/DataStreams/ExecutionSpeedLimits.cpp +++ b/dbms/src/DataStreams/ExecutionSpeedLimits.cpp @@ -37,10 +37,13 @@ static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_i } } -void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds) +void ExecutionSpeedLimits::throttle( + size_t read_rows, size_t read_bytes, + size_t total_rows_to_read, UInt64 total_elapsed_microseconds) { - if ((min_execution_speed || max_execution_speed || min_execution_speed_bytes || - max_execution_speed_bytes || (total_rows && timeout_before_checking_execution_speed != 0)) && + if ((min_execution_rps != 0 || max_execution_rps != 0 + || min_execution_bps != 0 || max_execution_bps != 0 + || (total_rows_to_read != 0 && timeout_before_checking_execution_speed != 0)) && (static_cast(total_elapsed_microseconds) > timeout_before_checking_execution_speed.totalMicroseconds())) { /// Do not count sleeps in throttlers @@ -52,33 +55,35 @@ void ExecutionSpeedLimits::throttle(size_t read_rows, size_t read_bytes, size_t if (elapsed_seconds > 0) { - if (min_execution_speed && read_rows / elapsed_seconds < min_execution_speed) + auto rows_per_second = read_rows / elapsed_seconds; + if (min_execution_rps && rows_per_second < min_execution_rps) throw Exception("Query is executing too slow: " + toString(read_rows / elapsed_seconds) - + " rows/sec., minimum: " + toString(min_execution_speed), + + " rows/sec., minimum: " + toString(min_execution_rps), ErrorCodes::TOO_SLOW); - if (min_execution_speed_bytes && read_bytes / elapsed_seconds < min_execution_speed_bytes) + auto bytes_per_second = read_bytes / elapsed_seconds; + if (min_execution_bps && bytes_per_second < min_execution_bps) throw Exception("Query is executing too slow: " + toString(read_bytes / elapsed_seconds) - + " bytes/sec., minimum: " + toString(min_execution_speed_bytes), + + " bytes/sec., minimum: " + toString(min_execution_bps), ErrorCodes::TOO_SLOW); /// If the predicted execution time is longer than `max_execution_time`. - if (max_execution_time != 0 && total_rows && read_rows) + if (max_execution_time != 0 && total_rows_to_read && read_rows) { - double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows) / read_rows); + double estimated_execution_time_seconds = elapsed_seconds * (static_cast(total_rows_to_read) / read_rows); if (estimated_execution_time_seconds > max_execution_time.totalSeconds()) throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)" + " is too long. Maximum: " + toString(max_execution_time.totalSeconds()) - + ". Estimated rows to process: " + toString(total_rows), + + ". Estimated rows to process: " + toString(total_rows_to_read), ErrorCodes::TOO_SLOW); } - if (max_execution_speed && read_rows / elapsed_seconds >= max_execution_speed) - limitProgressingSpeed(read_rows, max_execution_speed, total_elapsed_microseconds); + if (max_execution_rps && rows_per_second >= max_execution_rps) + limitProgressingSpeed(read_rows, max_execution_rps, total_elapsed_microseconds); - if (max_execution_speed_bytes && read_bytes / elapsed_seconds >= max_execution_speed_bytes) - limitProgressingSpeed(read_bytes, max_execution_speed_bytes, total_elapsed_microseconds); + if (max_execution_bps && bytes_per_second >= max_execution_bps) + limitProgressingSpeed(read_bytes, max_execution_bps, total_elapsed_microseconds); } } } diff --git a/dbms/src/DataStreams/ExecutionSpeedLimits.h b/dbms/src/DataStreams/ExecutionSpeedLimits.h index 6dbc2e5c687..a067fc86000 100644 --- a/dbms/src/DataStreams/ExecutionSpeedLimits.h +++ b/dbms/src/DataStreams/ExecutionSpeedLimits.h @@ -7,21 +7,22 @@ namespace DB { /// Limits for query execution speed. -/// In rows per second. class ExecutionSpeedLimits { public: - size_t min_execution_speed = 0; - size_t max_execution_speed = 0; - size_t min_execution_speed_bytes = 0; - size_t max_execution_speed_bytes = 0; + /// For rows per second. + size_t min_execution_rps = 0; + size_t max_execution_rps = 0; + /// For bytes per second. + size_t min_execution_bps = 0; + size_t max_execution_bps = 0; Poco::Timespan max_execution_time = 0; /// Verify that the speed is not too low after the specified time has elapsed. Poco::Timespan timeout_before_checking_execution_speed = 0; /// Pause execution in case if speed limits were exceeded. - void throttle(size_t read_rows, size_t read_bytes, size_t total_rows, UInt64 total_elapsed_microseconds); + void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds); }; } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index fe41080d033..9c6334981e3 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1568,10 +1568,10 @@ void InterpreterSelectQuery::executeFetchColumns( */ if (options.to_stage == QueryProcessingStage::Complete) { - limits.speed_limits.min_execution_speed = settings.min_execution_speed; - limits.speed_limits.max_execution_speed = settings.max_execution_speed; - limits.speed_limits.min_execution_speed_bytes = settings.min_execution_speed_bytes; - limits.speed_limits.max_execution_speed_bytes = settings.max_execution_speed_bytes; + limits.speed_limits.min_execution_rps = settings.min_execution_speed; + limits.speed_limits.max_execution_rps = settings.max_execution_speed; + limits.speed_limits.min_execution_bps = settings.min_execution_speed_bytes; + limits.speed_limits.max_execution_bps = settings.max_execution_speed_bytes; limits.speed_limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed; } diff --git a/dbms/src/Processors/Chunk.cpp b/dbms/src/Processors/Chunk.cpp index 253eb1e91b1..4be0502f604 100644 --- a/dbms/src/Processors/Chunk.cpp +++ b/dbms/src/Processors/Chunk.cpp @@ -97,6 +97,15 @@ Columns Chunk::detachColumns() return std::move(columns); } +void Chunk::addColumn(ColumnPtr column) +{ + if (column->size() != num_rows) + throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + + toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + + columns.emplace_back(std::move(column)) +} + void Chunk::erase(size_t position) { if (columns.empty()) diff --git a/dbms/src/Processors/Chunk.h b/dbms/src/Processors/Chunk.h index 7e33d8cf1c0..28e9bde56e5 100644 --- a/dbms/src/Processors/Chunk.h +++ b/dbms/src/Processors/Chunk.h @@ -72,11 +72,12 @@ public: UInt64 getNumRows() const { return num_rows; } UInt64 getNumColumns() const { return columns.size(); } - bool hasNoRows() const { return num_rows == 0; } - bool hasNoColumns() const { return columns.empty(); } - bool empty() const { return hasNoRows() && hasNoColumns(); } + bool hasRows() const { return num_rows > 0; } + bool hasColumns() const { return !columns.empty(); } + bool empty() const { return !hasRows() && !hasColumns(); } operator bool() const { return !empty(); } + void addColumn(ColumnPtr column); void erase(size_t position); UInt64 bytes() const; diff --git a/dbms/src/Processors/Executors/TreeExecutor.cpp b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp similarity index 83% rename from dbms/src/Processors/Executors/TreeExecutor.cpp rename to dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp index b53b32455ab..5d632bdcef5 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.cpp +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -9,7 +9,7 @@ static void checkProcessorHasSingleOutput(IProcessor * processor) { size_t num_outputs = processor->getOutputs().size(); if (num_outputs != 1) - throw Exception("All processors in TreeExecutor must have single output, " + throw Exception("All processors in TreeExecutorBlockInputStream must have single output, " "but processor with name " + processor->getName() + " has " + std::to_string(num_outputs), ErrorCodes::LOGICAL_ERROR); } @@ -25,7 +25,7 @@ static void validateTree(const Processors & processors, IProcessor * root, std:: bool is_inserted = index.try_emplace(processor.get(), index.size()).second; if (!is_inserted) - throw Exception("Duplicate processor in TreeExecutor with name " + processor->getName(), + throw Exception("Duplicate processor in TreeExecutorBlockInputStream with name " + processor->getName(), ErrorCodes::LOGICAL_ERROR); } @@ -43,13 +43,13 @@ static void validateTree(const Processors & processors, IProcessor * root, std:: if (it == index.end()) throw Exception("Processor with name " + node->getName() + " " - "was not mentioned in list passed to TreeExecutor, " + "was not mentioned in list passed to TreeExecutorBlockInputStream, " "but was traversed to from other processors.", ErrorCodes::LOGICAL_ERROR); size_t position = it->second; if (is_visited[position]) - throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutor. " + throw Exception("Processor with name " + node->getName() + " was visited twice while traverse in TreeExecutorBlockInputStream. " "Passed processors are not tree.", ErrorCodes::LOGICAL_ERROR); is_visited[position] = true; @@ -71,13 +71,13 @@ static void validateTree(const Processors & processors, IProcessor * root, std:: for (size_t i = 0; i < is_visited.size(); ++i) if (!is_visited[i]) throw Exception("Processor with name " + processors[i]->getName() + - " was not visited by traverse in TreeExecutor.", ErrorCodes::LOGICAL_ERROR); + " was not visited by traverse in TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR); } -void TreeExecutor::init() +void TreeExecutorBlockInputStream::init() { if (processors.empty()) - throw Exception("No processors were passed to TreeExecutor.", ErrorCodes::LOGICAL_ERROR); + throw Exception("No processors were passed to TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR); root = &output_port.getProcessor(); @@ -88,7 +88,7 @@ void TreeExecutor::init() input_port->setNeeded(); } -void TreeExecutor::execute() +void TreeExecutorBlockInputStream::execute() { std::stack stack; stack.push(root); @@ -120,7 +120,7 @@ void TreeExecutor::execute() if (inputs.empty()) throw Exception("Processors " + node->getName() + " with empty input " - "has returned NeedData in TreeExecutor", ErrorCodes::LOGICAL_ERROR); + "has returned NeedData in TreeExecutorBlockInputStream", ErrorCodes::LOGICAL_ERROR); bool all_finished = true; @@ -135,7 +135,7 @@ void TreeExecutor::execute() } if (all_finished) - throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutor, " + throw Exception("Processors " + node->getName() + " has returned NeedData in TreeExecutorBlockInputStream, " "but all it's inputs are finished.", ErrorCodes::LOGICAL_ERROR); break; } @@ -160,13 +160,13 @@ void TreeExecutor::execute() { throw Exception("Processor with name " + node->getName() + " " "returned status " + IProcessor::statusToName(status) + " " - "which is not supported in TreeExecutor.", ErrorCodes::LOGICAL_ERROR); + "which is not supported in TreeExecutorBlockInputStream.", ErrorCodes::LOGICAL_ERROR); } } } } -Block TreeExecutor::readImpl() +Block TreeExecutorBlockInputStream::readImpl() { while (true) { @@ -180,31 +180,31 @@ Block TreeExecutor::readImpl() } } -void TreeExecutor::setProgressCallback(const ProgressCallback & callback) +void TreeExecutorBlockInputStream::setProgressCallback(const ProgressCallback & callback) { for (auto & source : sources_with_progress) source->setProgressCallback(callback); } -void TreeExecutor::setProcessListElement(QueryStatus * elem) +void TreeExecutorBlockInputStream::setProcessListElement(QueryStatus * elem) { for (auto & source : sources_with_progress) source->setProcessListElement(elem); } -void TreeExecutor::setLimits(const IBlockInputStream::LocalLimits & limits_) +void TreeExecutorBlockInputStream::setLimits(const IBlockInputStream::LocalLimits & limits_) { for (auto & source : sources_with_progress) source->setLimits(limits_); } -void TreeExecutor::setQuota(QuotaForIntervals & quota_) +void TreeExecutorBlockInputStream::setQuota(QuotaForIntervals & quota_) { for (auto & source : sources_with_progress) source->setQuota(quota_); } -void TreeExecutor::addTotalRowsApprox(size_t value) +void TreeExecutorBlockInputStream::addTotalRowsApprox(size_t value) { /// Add only for one source. if (!sources_with_progress.empty()) diff --git a/dbms/src/Processors/Executors/TreeExecutor.h b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h similarity index 86% rename from dbms/src/Processors/Executors/TreeExecutor.h rename to dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h index d4817d6c99b..da1d60dd972 100644 --- a/dbms/src/Processors/Executors/TreeExecutor.h +++ b/dbms/src/Processors/Executors/TreeExecutorBlockInputStream.h @@ -9,8 +9,8 @@ class ISourceWithProgress; /// It's a wrapper from processors tree-shaped pipeline to block input stream. /// Execute all processors in a single thread, by in-order tree traverse. -/// Also, support fro progress and quotas. -class TreeExecutor : public IBlockInputStream +/// Also, support for progress and quotas. +class TreeExecutorBlockInputStream : public IBlockInputStream { public: /// Last processor in list must be a tree root. @@ -18,7 +18,7 @@ public: /// * processors form a tree /// * all processors are attainable from root /// * there is no other connected processors - explicit TreeExecutor(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors()) + explicit TreeExecutorBlockInputStream(Pipe pipe) : output_port(pipe.getPort()), processors(std::move(pipe).detachProcessors()) { init(); } diff --git a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h index aea9ee392b5..26d70d27ca7 100644 --- a/dbms/src/Processors/Transforms/AddingConstColumnTransform.h +++ b/dbms/src/Processors/Transforms/AddingConstColumnTransform.h @@ -19,11 +19,7 @@ protected: void transform(Chunk & chunk) override { auto num_rows = chunk.getNumRows(); - auto columns = chunk.detachColumns(); - - columns.emplace_back(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst()); - - chunk.setColumns(std::move(columns), num_rows); + chunk.addColumn(data_type->createColumnConst(num_rows, value)->convertToFullColumnIfConst()); } private: diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp index e37eae82de1..3a9cbe23873 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp @@ -176,7 +176,7 @@ IProcessor::Status MergingSortedTransform::prepare() return Status::NeedData; auto chunk = input.pull(); - if (chunk.hasNoRows()) + if (!chunk.hasRows()) return Status::NeedData; updateCursor(std::move(chunk), next_input_to_read); diff --git a/dbms/src/Storages/IStorage.cpp b/dbms/src/Storages/IStorage.cpp index c271f69090a..7a2960a1335 100644 --- a/dbms/src/Storages/IStorage.cpp +++ b/dbms/src/Storages/IStorage.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -440,7 +440,7 @@ BlockInputStreams IStorage::read( res.reserve(pipes.size()); for (auto & pipe : pipes) - res.emplace_back(std::make_shared(std::move(pipe))); + res.emplace_back(std::make_shared(std::move(pipe))); return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 17c5e4609c7..44a4e939565 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -59,7 +59,7 @@ Chunk MergeTreeBaseSelectProcessor::generate() auto res = readFromPart(); - if (!res.hasNoRows()) + if (res.hasRows()) { injectVirtualColumns(res, task.get(), virt_column_names); return res; @@ -231,7 +231,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeString().createColumn(); - callback.template insert(column, virtual_column_name); + callback.template operator()(column, virtual_column_name); } else if (virtual_column_name == "_part_index") { @@ -241,7 +241,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeUInt64().createColumn(); - callback.template insert(column, virtual_column_name); + callback.template operator()(column, virtual_column_name); } else if (virtual_column_name == "_partition_id") { @@ -251,7 +251,7 @@ static void injectVirtualColumnsImpl(size_t rows, InsertCallback & callback, Mer else column = DataTypeString().createColumn(); - callback.template insert(column, virtual_column_name); + callback.template operator()(column, virtual_column_name); } } } @@ -262,7 +262,7 @@ namespace struct InsertIntoBlockCallback { template - void insert(const ColumnPtr & column, const String & name) + void operator()(const ColumnPtr & column, const String & name) { block.insert({column, std::make_shared(), name}); } @@ -273,7 +273,7 @@ namespace struct InsertIntoColumnsCallback { template - void insert(const ColumnPtr & column, const String &) + void operator()(const ColumnPtr & column, const String &) { columns.push_back(column); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 80c8b337536..42648bce692 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -58,7 +58,7 @@ namespace std #include #include #include -#include +#include #include namespace ProfileEvents @@ -1103,7 +1103,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( for (size_t i = 0; i < sort_columns_size; ++i) sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1); - auto streams_to_merge = [&]() + auto streams_to_merge = [&pipes]() { size_t num_streams = pipes.size(); @@ -1111,7 +1111,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( streams.reserve(num_streams); for (size_t i = 0; i < num_streams; ++i) - streams.emplace_back(std::make_shared(std::move(pipes[i]))); + streams.emplace_back(std::make_shared(std::move(pipes[i]))); pipes.clear(); return streams; diff --git a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 37a3b931fa8..506a8cc3298 100644 --- a/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -6,7 +6,7 @@ #include #include -#include +#include namespace DB @@ -36,7 +36,7 @@ public: streams.reserve(pipes.size()); for (auto & pipe : pipes) - streams.emplace_back(std::make_shared(std::move(pipe))); + streams.emplace_back(std::make_shared(std::move(pipe))); return streams; } From f024b007a2e9d04721121e61028234a0f19545ad Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 21 Oct 2019 20:57:26 +0300 Subject: [PATCH 049/222] default_merge_block_size setting --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 18 +++++++++++------- .../src/Storages/MergeTree/MergeTreeSettings.h | 1 + 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index df3720359d3..344210c348c 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -683,42 +683,46 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// If merge is vertical we cannot calculate it bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical); + UInt64 merge_block_size = data_settings->default_merge_block_size; switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: merged_stream = std::make_unique( - src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size); + src_streams, sort_description, merge_block_size, 0, rows_sources_write_buf.get(), true, blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.sign_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size); + src_streams, sort_description, data.merging_params.sign_column, + merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; case MergeTreeData::MergingParams::Summing: merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.columns_to_sum, DEFAULT_MERGE_BLOCK_SIZE); + src_streams, sort_description, data.merging_params.columns_to_sum, merge_block_size); break; case MergeTreeData::MergingParams::Aggregating: merged_stream = std::make_unique( - src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE); + src_streams, sort_description, merge_block_size); break; case MergeTreeData::MergingParams::Replacing: merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.version_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size); + src_streams, sort_description, data.merging_params.version_column, + merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; case MergeTreeData::MergingParams::Graphite: merged_stream = std::make_unique( - src_streams, sort_description, DEFAULT_MERGE_BLOCK_SIZE, + src_streams, sort_description, merge_block_size, data.merging_params.graphite_params, time_of_merge); break; case MergeTreeData::MergingParams::VersionedCollapsing: merged_stream = std::make_unique( - src_streams, sort_description, data.merging_params.sign_column, DEFAULT_MERGE_BLOCK_SIZE, rows_sources_write_buf.get(), blocks_are_granules_size); + src_streams, sort_description, data.merging_params.sign_column, + merge_block_size, rows_sources_write_buf.get(), blocks_are_granules_size); break; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 3652718451f..1e796415254 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -29,6 +29,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ \ /** Merge settings. */ \ + M(SettingUInt64, default_merge_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \ M(SettingUInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).") \ M(SettingUInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).") \ M(SettingUInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \ From e7ba48ee84acb52660a9a44d942738e2e140aca2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 22 Oct 2019 02:49:44 +0300 Subject: [PATCH 050/222] Fix build. --- dbms/src/Processors/Chunk.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Chunk.cpp b/dbms/src/Processors/Chunk.cpp index 4be0502f604..d9d0574d3b8 100644 --- a/dbms/src/Processors/Chunk.cpp +++ b/dbms/src/Processors/Chunk.cpp @@ -103,7 +103,7 @@ void Chunk::addColumn(ColumnPtr column) throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); - columns.emplace_back(std::move(column)) + columns.emplace_back(std::move(column)); } void Chunk::erase(size_t position) From dad1e397e2ad628be4b9e7d2f8f98281b6196b16 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 22 Oct 2019 02:51:36 +0300 Subject: [PATCH 051/222] Fix build. --- dbms/src/Processors/Transforms/MergingSortedTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp index 3a9cbe23873..d8f06a7fe4a 100644 --- a/dbms/src/Processors/Transforms/MergingSortedTransform.cpp +++ b/dbms/src/Processors/Transforms/MergingSortedTransform.cpp @@ -132,7 +132,7 @@ IProcessor::Status MergingSortedTransform::prepare() } auto chunk = input.pull(); - if (chunk.hasNoRows()) + if (!chunk.hasRows()) { all_inputs_has_data = false; continue; From 645bcdacaed05b511d2cce9ac1135adab5460289 Mon Sep 17 00:00:00 2001 From: memo Date: Tue, 22 Oct 2019 10:23:07 +0800 Subject: [PATCH 052/222] add docs and simplify the code --- dbms/src/Functions/array/arrayCompact.cpp | 38 +------------------ .../functions/array_functions.md | 19 +++++++++- 2 files changed, 20 insertions(+), 37 deletions(-) diff --git a/dbms/src/Functions/array/arrayCompact.cpp b/dbms/src/Functions/array/arrayCompact.cpp index 0775c4cb7bb..bd3d8fa303e 100644 --- a/dbms/src/Functions/array/arrayCompact.cpp +++ b/dbms/src/Functions/array/arrayCompact.cpp @@ -3,7 +3,6 @@ #include "FunctionArrayMapped.h" #include - namespace DB { /// arrayCompact(['a', 'a', 'b', 'b', 'a']) = ['a', 'b', 'a'] - compact arrays @@ -14,6 +13,7 @@ namespace DB struct ArrayCompactImpl { + static bool useDefaultImplementationForConstants() { return true; } static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } @@ -41,40 +41,7 @@ namespace DB const ColumnVector * column = checkAndGetColumn>(&*mapped); if (!column) - { - const ColumnConst * column_const = checkAndGetColumnConst>(&*mapped); - - if (!column_const) - return false; - - const Element x = column_const->template getValue(); - const IColumn::Offsets & offsets = array.getOffsets(); - auto column_data = ColumnVector::create(column_const->size()); - typename ColumnVector::Container & res_values = column_data->getData(); - auto column_offsets = ColumnArray::ColumnOffsets::create(offsets.size()); - IColumn::Offsets & res_offsets = column_offsets->getData(); - - size_t res_pos = 0; - size_t pos = 0; - for (size_t i = 0; i < offsets.size(); ++i) - { - if (pos < offsets[i]) - { - res_values[res_pos] = x; - for (++pos, ++res_pos; pos < offsets[i]; ++pos) - { - res_values[res_pos++] = x; - } - } - res_offsets[i] = res_pos; - } - for (size_t i = 0; i < column_data->size() - res_pos; ++i) - { - res_values.pop_back(); - } - res_ptr = ColumnArray::create(std::move(column_data), std::move(column_offsets)); - return true; - } + return false; const IColumn::Offsets & offsets = array.getOffsets(); const typename ColumnVector::Container & data = column->getData(); @@ -138,4 +105,3 @@ namespace DB } } - diff --git a/docs/en/query_language/functions/array_functions.md b/docs/en/query_language/functions/array_functions.md index a43f975254f..5da4f939713 100644 --- a/docs/en/query_language/functions/array_functions.md +++ b/docs/en/query_language/functions/array_functions.md @@ -789,5 +789,22 @@ SELECT arrayReverse([1, 2, 3]) Synonym for ["arrayReverse"](#array_functions-arrayreverse) - [Original article](https://clickhouse.yandex/docs/en/query_language/functions/array_functions/) + +## arrayCompact(arr) {#array_functions-arraycompact} + +Takes an array, returns an array with elements that are different between two adjacent elements. + +Example: + +```sql +SELECT arrayCompact([1, 2, 2, 3, 2, 3, 3]) +``` + +```text +┌─arrayDistinct([1, 2, 2, 3, 2, 3, 3])─┐ +│ [1,2,3,2,3] │ +└──────────────────────────────────────┘ +``` + +## \ No newline at end of file From 640da3f51268d89d32915a35c22744dd94f26756 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 22 Oct 2019 16:59:13 +0300 Subject: [PATCH 053/222] Try to fix AggregateFunctionGroupBitmap. --- dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index 65a450bfbaf..6479eaf3c1f 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -90,6 +90,10 @@ public: { Data & data_lhs = this->data(place); const Data & data_rhs = this->data(rhs); + + if (!data_rhs.doneFirst) + return; + if (!data_lhs.doneFirst) { data_lhs.doneFirst = true; From bcc4c2f0af20fd95529c74ae1bd1af717629ea69 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 22 Oct 2019 17:11:29 +0300 Subject: [PATCH 054/222] Disable processors by default. --- dbms/src/Core/Settings.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 8e21ff830f1..30752113a6b 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -360,7 +360,7 @@ struct Settings : public SettingsCollection M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \ \ - M(SettingBool, experimental_use_processors, true, "Use processors pipeline.") \ + M(SettingBool, experimental_use_processors, false, "Use processors pipeline.") \ \ M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \ M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \ @@ -381,7 +381,7 @@ struct Settings : public SettingsCollection \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ - M(SettingBool, allow_experimental_low_cardinality_type, false, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ + M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") \ M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13") \ DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) From 9abab40512e93d6c7f6eb15ba7bf23411f4cdaa7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 23 Oct 2019 06:45:43 +0300 Subject: [PATCH 055/222] Added more comments. --- dbms/src/Processors/Pipe.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Processors/Pipe.h b/dbms/src/Processors/Pipe.h index 55b397c82d6..872f04c339b 100644 --- a/dbms/src/Processors/Pipe.h +++ b/dbms/src/Processors/Pipe.h @@ -11,7 +11,11 @@ using Pipes = std::vector; class Pipe { public: + /// Create from source. It must have no input ports and single output. explicit Pipe(ProcessorPtr source); + /// Connect several pipes together with specified transform. + /// Transform must have the number of inputs equals to the number of pipes. And single output. + /// Will connect pipes outputs with transform inputs automatically. Pipe(Pipes && pipes, ProcessorPtr transform); Pipe(const Pipe & other) = delete; From 081e9d95544a6d264c52f922946823bf51499094 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 23 Oct 2019 12:59:57 +0300 Subject: [PATCH 056/222] Fixed links. --- docs/en/operations/system_tables.md | 5 +++++ docs/en/query_language/alter.md | 2 ++ 2 files changed, 7 insertions(+) diff --git a/docs/en/operations/system_tables.md b/docs/en/operations/system_tables.md index 47bbf0266ac..de0c277f100 100644 --- a/docs/en/operations/system_tables.md +++ b/docs/en/operations/system_tables.md @@ -757,4 +757,9 @@ If there were problems with mutating some parts, the following columns contain a **latest_fail_reason** - The exception message that caused the most recent part mutation failure. + +## system.disks {#system_tables-disks} + +## system.storage_policies {#system_tables-storage_policies} + [Original article](https://clickhouse.yandex/docs/en/operations/system_tables/) diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 5c1d6331add..b7b37924c71 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -355,6 +355,8 @@ Before downloading, the system checks if the partition exists and the table stru Although the query is called `ALTER TABLE`, it does not change the table structure and does not immediately change the data available in the table. +#### MOVE PARTITION|PART {#alter_move-partition} + #### How To Set Partition Expression {#alter-how-to-specify-part-expr} You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: From 9818eada694ce85ad3837a42e53e284bd08c3c7d Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 24 Oct 2019 02:18:21 +0300 Subject: [PATCH 057/222] rename: merge_max_block_size --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeSettings.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 344210c348c..c44aee7e842 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -683,7 +683,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor /// If merge is vertical we cannot calculate it bool blocks_are_granules_size = (merge_alg == MergeAlgorithm::Vertical); - UInt64 merge_block_size = data_settings->default_merge_block_size; + UInt64 merge_block_size = data_settings->merge_max_block_size; switch (data.merging_params.mode) { case MergeTreeData::MergingParams::Ordinary: diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 1e796415254..6db22063841 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -29,7 +29,7 @@ struct MergeTreeSettings : public SettingsCollection M(SettingUInt64, index_granularity, 8192, "How many rows correspond to one primary key value.") \ \ /** Merge settings. */ \ - M(SettingUInt64, default_merge_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \ + M(SettingUInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.") \ M(SettingUInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).") \ M(SettingUInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).") \ M(SettingUInt64, max_replicated_merges_in_queue, 16, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.") \ From 3767cb76de3aaf57d56502e152f53d167fc7c242 Mon Sep 17 00:00:00 2001 From: hcz Date: Fri, 25 Oct 2019 11:25:02 +0800 Subject: [PATCH 058/222] Improve style --- .../AggregateFunctionMinMaxAny.h | 8 ++-- .../AggregateFunctionQuantile.cpp | 16 ++++---- .../src/AggregateFunctions/ReservoirSampler.h | 2 +- dbms/src/Functions/FunctionBinaryArithmetic.h | 4 +- .../Functions/FunctionsStringSimilarity.cpp | 38 +++++++++---------- dbms/src/Functions/Regexps.h | 8 ++-- dbms/src/Functions/array/arrayFill.cpp | 8 ++-- dbms/src/Functions/array/arraySort.cpp | 2 +- dbms/src/Functions/array/arraySplit.cpp | 12 +++--- dbms/src/Functions/formatString.h | 14 +++---- .../registerFunctionsHigherOrder.cpp | 12 +++--- 11 files changed, 62 insertions(+), 62 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 00869c846d4..db2978db6a0 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -673,15 +673,15 @@ struct AggregateFunctionAnyHeavyData : Data }; -template -class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper> +template +class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper> { private: DataTypePtr & type; public: AggregateFunctionsSingleValue(const DataTypePtr & type_) - : IAggregateFunctionDataHelper>({type_}, {}) + : IAggregateFunctionDataHelper>({type_}, {}) , type(this->argument_types[0]) { if (StringRef(Data::name()) == StringRef("min") @@ -722,7 +722,7 @@ public: bool allocatesMemoryInArena() const override { - return AllocatesMemoryInArena; + return use_arena; } void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override diff --git a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp index 2439120d169..d96bb82d6f5 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -16,11 +16,11 @@ namespace ErrorCodes namespace { -template using FuncQuantile = AggregateFunctionQuantile, NameQuantile, false, std::conditional_t, false>; -template using FuncQuantiles = AggregateFunctionQuantile, NameQuantiles, false, std::conditional_t, true>; +template using FuncQuantile = AggregateFunctionQuantile, NameQuantile, false, std::conditional_t, false>; +template using FuncQuantiles = AggregateFunctionQuantile, NameQuantiles, false, std::conditional_t, true>; -template using FuncQuantileDeterministic = AggregateFunctionQuantile, NameQuantileDeterministic, true, std::conditional_t, false>; -template using FuncQuantilesDeterministic = AggregateFunctionQuantile, NameQuantilesDeterministic, true, std::conditional_t, true>; +template using FuncQuantileDeterministic = AggregateFunctionQuantile, NameQuantileDeterministic, true, std::conditional_t, false>; +template using FuncQuantilesDeterministic = AggregateFunctionQuantile, NameQuantilesDeterministic, true, std::conditional_t, true>; template using FuncQuantileExact = AggregateFunctionQuantile, NameQuantileExact, false, void, false>; template using FuncQuantilesExact = AggregateFunctionQuantile, NameQuantilesExact, false, void, true>; @@ -40,11 +40,11 @@ template using FuncQuantilesTiming = AggregateFunctionQ template using FuncQuantileTimingWeighted = AggregateFunctionQuantile, NameQuantileTimingWeighted, true, Float32, false>; template using FuncQuantilesTimingWeighted = AggregateFunctionQuantile, NameQuantilesTimingWeighted, true, Float32, true>; -template using FuncQuantileTDigest = AggregateFunctionQuantile, NameQuantileTDigest, false, std::conditional_t, false>; -template using FuncQuantilesTDigest = AggregateFunctionQuantile, NameQuantilesTDigest, false, std::conditional_t, true>; +template using FuncQuantileTDigest = AggregateFunctionQuantile, NameQuantileTDigest, false, std::conditional_t, false>; +template using FuncQuantilesTDigest = AggregateFunctionQuantile, NameQuantilesTDigest, false, std::conditional_t, true>; -template using FuncQuantileTDigestWeighted = AggregateFunctionQuantile, NameQuantileTDigestWeighted, true, std::conditional_t, false>; -template using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile, NameQuantilesTDigestWeighted, true, std::conditional_t, true>; +template using FuncQuantileTDigestWeighted = AggregateFunctionQuantile, NameQuantileTDigestWeighted, true, std::conditional_t, false>; +template using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile, NameQuantilesTDigestWeighted, true, std::conditional_t, true>; template