From 22f48528304afe15b5b3c72e8dce673ba7b5f7d5 Mon Sep 17 00:00:00 2001 From: Vladislav Smirnov Date: Wed, 13 Mar 2019 13:50:49 +0300 Subject: [PATCH 01/78] fix reading capnproto --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 52 ++++++++++++++++---- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 692d1ec8dee..d0bea371d10 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -213,21 +213,47 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) return false; // Read from underlying buffer directly - auto buf = istr.buffer(); + auto& buf = istr.buffer(); auto base = reinterpret_cast(istr.position()); // Check if there's enough bytes in the buffer to read the full message kj::Array heap_array; - auto array = kj::arrayPtr(base, buf.size() - istr.offset()); - auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); - if (expected_words * sizeof(capnp::word) > array.size()) - { - // We'll need to reassemble the message in a contiguous buffer - heap_array = kj::heapArray(expected_words); - istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size()); - array = heap_array.asPtr(); - } + kj::ArrayPtr array; + size_t bytesSize = buf.size() - istr.offset(); + size_t remainingBytes = 0; + if (bytesSize < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) + { + char edgeBytes[sizeof(capnp::word)]; + while (bytesSize + remainingBytes < sizeof(capnp::word)) + { + istr.readStrict(edgeBytes + remainingBytes, bytesSize); + remainingBytes += bytesSize; + istr.next(); + bytesSize = buf.size(); + } + + auto wordsSize = bytesSize / sizeof(capnp::word) + 1; + heap_array = kj::heapArray(wordsSize + 1); + auto chars_heap_array = heap_array.asChars(); + ::memcpy(chars_heap_array.begin(), edgeBytes, remainingBytes); + ::memcpy(chars_heap_array.begin() + remainingBytes, buf.begin(), buf.size()); + array = heap_array.asPtr(); + } + else + { + auto wordsSize = bytesSize / sizeof(capnp::word); + + array = kj::arrayPtr(base, wordsSize); + auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); + if (expected_words * sizeof(capnp::word) > array.size()) + { + // We'll need to reassemble the message in a contiguous buffer + heap_array = kj::heapArray(expected_words); + istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size()); + array = heap_array.asPtr(); + } + } #if CAPNP_VERSION >= 8000 capnp::UnalignedFlatArrayMessageReader msg(array); @@ -287,6 +313,12 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) auto parsed = (msg.getEnd() - base) * sizeof(capnp::word); istr.position() += parsed; } + // Advance buffer position if used with remaining bytes from previous buffer + else if (remainingBytes != 0) + { + auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remainingBytes; + istr.position() += parsed; + } return true; } From 7d822d80813c8e7beb6cb462d17bd9a821565f5c Mon Sep 17 00:00:00 2001 From: Vladislav Smirnov Date: Wed, 13 Mar 2019 14:05:52 +0300 Subject: [PATCH 02/78] tabs -> spaces --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 67 ++++++++++---------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index d0bea371d10..c84442e83f5 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -221,39 +221,38 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) kj::ArrayPtr array; size_t bytesSize = buf.size() - istr.offset(); - size_t remainingBytes = 0; - if (bytesSize < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) - { - char edgeBytes[sizeof(capnp::word)]; - while (bytesSize + remainingBytes < sizeof(capnp::word)) - { - istr.readStrict(edgeBytes + remainingBytes, bytesSize); - remainingBytes += bytesSize; - istr.next(); - bytesSize = buf.size(); - } + size_t remainingBytes = 0; + if (bytesSize < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) + { + char edgeBytes[sizeof(capnp::word)]; + while (bytesSize + remainingBytes < sizeof(capnp::word)) + { + istr.readStrict(edgeBytes + remainingBytes, bytesSize); + remainingBytes += bytesSize; + istr.next(); + bytesSize = buf.size(); + } - auto wordsSize = bytesSize / sizeof(capnp::word) + 1; - heap_array = kj::heapArray(wordsSize + 1); - auto chars_heap_array = heap_array.asChars(); - ::memcpy(chars_heap_array.begin(), edgeBytes, remainingBytes); - ::memcpy(chars_heap_array.begin() + remainingBytes, buf.begin(), buf.size()); - array = heap_array.asPtr(); - } - else - { - auto wordsSize = bytesSize / sizeof(capnp::word); - - array = kj::arrayPtr(base, wordsSize); - auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); - if (expected_words * sizeof(capnp::word) > array.size()) - { - // We'll need to reassemble the message in a contiguous buffer - heap_array = kj::heapArray(expected_words); - istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size()); - array = heap_array.asPtr(); - } - } + auto wordsSize = bytesSize / sizeof(capnp::word) + 1; + heap_array = kj::heapArray(wordsSize + 1); + auto chars_heap_array = heap_array.asChars(); + ::memcpy(chars_heap_array.begin(), edgeBytes, remainingBytes); + ::memcpy(chars_heap_array.begin() + remainingBytes, buf.begin(), buf.size()); + array = heap_array.asPtr(); + } + else + { + auto wordsSize = bytesSize / sizeof(capnp::word); + array = kj::arrayPtr(base, wordsSize); + auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); + if (expected_words * sizeof(capnp::word) > array.size()) + { + // We'll need to reassemble the message in a contiguous buffer + heap_array = kj::heapArray(expected_words); + istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size()); + array = heap_array.asPtr(); + } + } #if CAPNP_VERSION >= 8000 capnp::UnalignedFlatArrayMessageReader msg(array); @@ -316,8 +315,8 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) // Advance buffer position if used with remaining bytes from previous buffer else if (remainingBytes != 0) { - auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remainingBytes; - istr.position() += parsed; + auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remainingBytes; + istr.position() += parsed; } return true; From a916b62f33774e1d7ce7ba5a6883895004cc8d92 Mon Sep 17 00:00:00 2001 From: Vladislav Smirnov Date: Wed, 13 Mar 2019 14:18:18 +0300 Subject: [PATCH 03/78] ignore instead append --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index c84442e83f5..5b764c1c93d 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -310,13 +310,13 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) if (heap_array.size() == 0) { auto parsed = (msg.getEnd() - base) * sizeof(capnp::word); - istr.position() += parsed; + istr.ignore(parsed); } // Advance buffer position if used with remaining bytes from previous buffer else if (remainingBytes != 0) { auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remainingBytes; - istr.position() += parsed; + istr.ignore(parsed); } return true; From 80e37baff8ec9009fe5accbf73e7f7263f877664 Mon Sep 17 00:00:00 2001 From: liuyangkuan Date: Wed, 13 Mar 2019 17:33:43 +0800 Subject: [PATCH 04/78] change supportsPrewhere() of table engine Buffer and return its destination_table supportsPrewhere() or not --- dbms/src/Storages/StorageBuffer.h | 10 +++++++++- .../0_stateless/00910_buffer_prewhere.reference | 1 + .../queries/0_stateless/00910_buffer_prewhere.sql | 6 ++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference create mode 100644 dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql diff --git a/dbms/src/Storages/StorageBuffer.h b/dbms/src/Storages/StorageBuffer.h index 85ea3f086b5..e2121f7be48 100644 --- a/dbms/src/Storages/StorageBuffer.h +++ b/dbms/src/Storages/StorageBuffer.h @@ -74,7 +74,15 @@ public: void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override { name = new_table_name; } bool supportsSampling() const override { return true; } - bool supportsPrewhere() const override { return false; } + bool supportsPrewhere() const override + { + if (no_destination) + return false; + auto dest = global_context.tryGetTable(destination_database, destination_table); + if (dest && dest.get() != this) + return dest->supportsPrewhere(); + return false; + } bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference new file mode 100644 index 00000000000..56a6051ca2b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql new file mode 100644 index 00000000000..43d7735d832 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql @@ -0,0 +1,6 @@ +DROP DATABASE IF EXISTS test_buffer; +CREATE DATABASE test_buffer; +CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); +CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); +INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25) +SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00') \ No newline at end of file From 59d473e2151eb5ba61215c68fc22553c2446b621 Mon Sep 17 00:00:00 2001 From: Vladislav Smirnov Date: Wed, 13 Mar 2019 18:28:29 +0300 Subject: [PATCH 05/78] style --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 32 ++++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index 5b764c1c93d..d1c926b122d 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -220,30 +220,30 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) kj::Array heap_array; kj::ArrayPtr array; - size_t bytesSize = buf.size() - istr.offset(); - size_t remainingBytes = 0; - if (bytesSize < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) + size_t bytes_size = buf.size() - istr.offset(); + size_t remaining_bytes = 0; + if (bytes_size < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) { - char edgeBytes[sizeof(capnp::word)]; - while (bytesSize + remainingBytes < sizeof(capnp::word)) + char edge_bytes[sizeof(capnp::word)]; + while (bytes_size + remaining_bytes < sizeof(capnp::word)) { - istr.readStrict(edgeBytes + remainingBytes, bytesSize); - remainingBytes += bytesSize; + istr.readStrict(edge_bytes + remaining_bytes, bytes_size); + remaining_bytes += bytes_size; istr.next(); - bytesSize = buf.size(); + bytes_size = buf.size(); } - auto wordsSize = bytesSize / sizeof(capnp::word) + 1; - heap_array = kj::heapArray(wordsSize + 1); + auto words_size = bytes_size / sizeof(capnp::word) + 1; + heap_array = kj::heapArray(words_size + 1); auto chars_heap_array = heap_array.asChars(); - ::memcpy(chars_heap_array.begin(), edgeBytes, remainingBytes); - ::memcpy(chars_heap_array.begin() + remainingBytes, buf.begin(), buf.size()); + ::memcpy(chars_heap_array.begin(), edge_bytes, remaining_bytes); + ::memcpy(chars_heap_array.begin() + remaining_bytes, buf.begin(), buf.size()); array = heap_array.asPtr(); } else { - auto wordsSize = bytesSize / sizeof(capnp::word); - array = kj::arrayPtr(base, wordsSize); + auto words_size = bytes_size / sizeof(capnp::word); + array = kj::arrayPtr(base, words_size); auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); if (expected_words * sizeof(capnp::word) > array.size()) { @@ -313,9 +313,9 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) istr.ignore(parsed); } // Advance buffer position if used with remaining bytes from previous buffer - else if (remainingBytes != 0) + else if (remaining_bytes != 0) { - auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remainingBytes; + auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remaining_bytes; istr.ignore(parsed); } From 53abdccb95b093a3f2f8ef577a50753398a2d074 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 15 Mar 2019 16:49:58 +0300 Subject: [PATCH 06/78] refactoring: extract SelectQueryOptions --- .../ClusterProxy/SelectStreamFactory.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- .../Interpreters/InterpreterExplainQuery.cpp | 3 +- dbms/src/Interpreters/InterpreterFactory.cpp | 4 +- .../Interpreters/InterpreterInsertQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 49 ++++++--------- .../src/Interpreters/InterpreterSelectQuery.h | 39 +++--------- .../InterpreterSelectWithUnionQuery.cpp | 27 +++----- .../InterpreterSelectWithUnionQuery.h | 12 ++-- .../src/Interpreters/MutationsInterpreter.cpp | 5 +- dbms/src/Interpreters/SelectQueryOptions.h | 62 +++++++++++++++++++ dbms/src/Interpreters/interpretSubquery.cpp | 2 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 3 +- dbms/src/Storages/StorageMerge.cpp | 6 +- dbms/src/Storages/StorageView.cpp | 2 +- 16 files changed, 121 insertions(+), 101 deletions(-) create mode 100644 dbms/src/Interpreters/SelectQueryOptions.h diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index b34b3d34633..e3337787a39 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -58,7 +58,7 @@ namespace BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage) { - InterpreterSelectQuery interpreter{query_ast, context, Names{}, processed_stage}; + InterpreterSelectQuery interpreter{query_ast, context, SelectQueryOptions::run(processed_stage)}; BlockInputStreamPtr stream = interpreter.execute().in; /** Materialization is needed, since from remote servers the constants come materialized. diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index b6cbaaf181b..c85f9557f6d 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -78,7 +78,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr ASTPtr subquery_select = subquery.children.at(0); BlockIO res = InterpreterSelectWithUnionQuery( - subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute(); + subquery_select, subquery_context, SelectQueryOptions::run(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute(); Block block; try diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.cpp b/dbms/src/Interpreters/InterpreterExplainQuery.cpp index be7a592ecb9..cfdb4b0fe0c 100644 --- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp +++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp @@ -51,7 +51,8 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() } else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax) { - InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true); + InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, + SelectQueryOptions::analyzeModify(QueryProcessingStage::FetchColumns)); interpreter.getQuery()->format(IAST::FormatSettings(ss, false)); } diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 5d1b259cc0d..05e3aa001fd 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -84,12 +84,12 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. - return std::make_unique(query, context, Names{}, stage); + return std::make_unique(query, context, SelectQueryOptions::run(stage)); } else if (typeid_cast(query.get())) { ProfileEvents::increment(ProfileEvents::SelectQuery); - return std::make_unique(query, context, Names{}, stage); + return std::make_unique(query, context, SelectQueryOptions::run(stage)); } else if (typeid_cast(query.get())) { diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index d5c2600eda4..93506f86bbe 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -128,7 +128,7 @@ BlockIO InterpreterInsertQuery::execute() if (query.select) { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectWithUnionQuery interpreter_select{query.select, context, {}, QueryProcessingStage::Complete, 1}; + InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions::run(QueryProcessingStage::Complete, 1)}; res.in = interpreter_select.execute().in; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 83d53ca37b8..41079fc4479 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -79,13 +79,9 @@ namespace ErrorCodes InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_result_column_names, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - bool only_analyze_, - bool modify_inplace) - : InterpreterSelectQuery( - query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace) + const SelectQueryOptions & options, + const Names & required_result_column_names) + : InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, options, required_result_column_names) { } @@ -93,10 +89,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const BlockInputStreamPtr & input_, - QueryProcessingStage::Enum to_stage_, - bool only_analyze_, - bool modify_inplace) - : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace) + const SelectQueryOptions & options) + : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, options.checkZeroSubquery()) { } @@ -104,10 +98,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const StoragePtr & storage_, - QueryProcessingStage::Enum to_stage_, - bool only_analyze_, - bool modify_inplace) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace) + const SelectQueryOptions & options) + : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, options.checkZeroSubquery()) { } @@ -134,17 +126,12 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const BlockInputStreamPtr & input_, const StoragePtr & storage_, - const Names & required_result_column_names, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - bool only_analyze_, - bool modify_inplace) + const SelectQueryOptions & options, + const Names & required_result_column_names) + : SelectQueryOptions(options) /// NOTE: the query almost always should be cloned because it will be modified during analysis. - : query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone()) + , query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone()) , context(context_) - , to_stage(to_stage_) - , subquery_depth(subquery_depth_) - , only_analyze(only_analyze_) , storage(storage_) , input(input_) , log(&Logger::get("InterpreterSelectQuery")) @@ -190,7 +177,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Read from subquery. interpreter_subquery = std::make_unique( - table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace); + table_expression, getSubqueryContext(context), subqueryOptions(QueryProcessingStage::Complete), required_columns); source_header = interpreter_subquery->getSampleBlock(); } @@ -248,11 +235,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( interpreter_subquery = std::make_unique( table_expression, getSubqueryContext(context), - required_columns, - QueryProcessingStage::Complete, - subquery_depth + 1, - only_analyze, - modify_inplace); + subqueryOptions(QueryProcessingStage::Complete), + required_columns); } } @@ -1001,8 +985,11 @@ void InterpreterSelectQuery::executeFetchColumns( if (!subquery) throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + SelectQueryOptions opts = subqueryOptions(QueryProcessingStage::Complete); + opts.modify_inplace = false; + interpreter_subquery = std::make_unique( - subquery, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze); + subquery, getSubqueryContext(context), opts, required_columns); if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index 89fdc35eb7b..db16bd0b134 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -23,23 +24,16 @@ class InterpreterSelectWithUnionQuery; struct SyntaxAnalyzerResult; using SyntaxAnalyzerResultPtr = std::shared_ptr; + /** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage. */ -class InterpreterSelectQuery : public IInterpreter +class InterpreterSelectQuery : public IInterpreter, private SelectQueryOptions { public: /** * query_ptr * - A query AST to interpret. * - * to_stage - * - the stage to which the query is to be executed. By default - till to the end. - * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. - * - * subquery_depth - * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; - * for INSERT SELECT, a value 1 is passed instead of 0. - * * required_result_column_names * - don't calculate all columns except the specified ones from the query * - it is used to remove calculation (and reading) of unnecessary columns from subqueries. @@ -49,29 +43,22 @@ public: InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_result_column_names = Names{}, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - size_t subquery_depth_ = 0, - bool only_analyze_ = false, - bool modify_inplace = false); + const SelectQueryOptions &, + const Names & required_result_column_names = Names{}); /// Read data not from the table specified in the query, but from the prepared source `input`. InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const BlockInputStreamPtr & input_, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - bool only_analyze_ = false, - bool modify_inplace = false); + const SelectQueryOptions & = {}); /// Read data not from the table specified in the query, but from the specified `storage_`. InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const StoragePtr & storage_, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - bool only_analyze_ = false, - bool modify_inplace = false); + const SelectQueryOptions & = {}); ~InterpreterSelectQuery() override; @@ -93,11 +80,8 @@ private: const Context & context_, const BlockInputStreamPtr & input_, const StoragePtr & storage_, - const Names & required_result_column_names, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - bool only_analyze_, - bool modify_inplace); + const SelectQueryOptions &, + const Names & required_result_column_names = {}); struct Pipeline @@ -224,8 +208,6 @@ private: ASTPtr query_ptr; Context context; - QueryProcessingStage::Enum to_stage; - size_t subquery_depth = 0; NamesAndTypesList source_columns; SyntaxAnalyzerResultPtr syntax_analyzer_result; std::unique_ptr query_analyzer; @@ -233,9 +215,6 @@ private: /// How many streams we ask for storage to produce, and in how many threads we will do further processing. size_t max_streams = 1; - /// The object was created only for query analysis. - bool only_analyze = false; - /// List of columns to read to execute the query. Names required_columns; /// Structure of query source (table, subquery, etc). diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 1dc5419223e..a17a6958c44 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -26,15 +26,11 @@ namespace ErrorCodes InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_result_column_names, - QueryProcessingStage::Enum to_stage_, - size_t subquery_depth_, - bool only_analyze, - bool modify_inplace) - : query_ptr(query_ptr_), - context(context_), - to_stage(to_stage_), - subquery_depth(subquery_depth_) + const SelectQueryOptions & options, + const Names & required_result_column_names) + : SelectQueryOptions(options), + query_ptr(query_ptr_), + context(context_) { const ASTSelectWithUnionQuery & ast = typeid_cast(*query_ptr); @@ -57,7 +53,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( /// We use it to determine positions of 'required_result_column_names' in SELECT clause. Block full_result_header = InterpreterSelectQuery( - ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); + ast.list_of_selects->children.at(0), context, SelectQueryOptions::analyze(to_stage, subquery_depth)).getSampleBlock(); std::vector positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) @@ -66,7 +62,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (size_t query_num = 1; query_num < num_selects; ++query_num) { Block full_result_header_for_current_select = InterpreterSelectQuery( - ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); + ast.list_of_selects->children.at(query_num), context, SelectQueryOptions::analyze(to_stage, subquery_depth)).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) throw Exception("Different number of columns in UNION ALL elements:\n" @@ -89,11 +85,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.emplace_back(std::make_unique( ast.list_of_selects->children.at(query_num), context, - current_required_result_column_names, - to_stage, - subquery_depth, - only_analyze, - modify_inplace)); + queryOptions(), + current_required_result_column_names)); } /// Determine structure of the result. @@ -179,7 +172,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock( return cache[key]; } - return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, {}, QueryProcessingStage::Complete, 0, true).getSampleBlock(); + return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, SelectQueryOptions::analyze()).getSampleBlock(); } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index 44131a9d100..a913c6dfc59 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -13,17 +14,14 @@ class InterpreterSelectQuery; /** Interprets one or multiple SELECT queries inside UNION ALL chain. */ -class InterpreterSelectWithUnionQuery : public IInterpreter +class InterpreterSelectWithUnionQuery : public IInterpreter, private SelectQueryOptions { public: InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - const Names & required_result_column_names = Names{}, - QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete, - size_t subquery_depth_ = 0, - bool only_analyze = false, - bool modify_inplace = false); + const SelectQueryOptions &, + const Names & required_result_column_names = {}); ~InterpreterSelectWithUnionQuery() override; @@ -45,8 +43,6 @@ public: private: ASTPtr query_ptr; Context context; - QueryProcessingStage::Enum to_stage; - size_t subquery_depth; std::vector> nested_interpreters; diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index fc3ebe2f76e..6a0c58f5f45 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -72,7 +72,7 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const context_copy.getSettingsRef().merge_tree_uniform_read_distribution = 0; context_copy.getSettingsRef().max_threads = 1; - BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage, QueryProcessingStage::Complete).execute().in; + BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage).execute().in; Block block = in->read(); if (!block.rows()) @@ -366,7 +366,8 @@ void MutationsInterpreter::prepare(bool dry_run) select->children.push_back(where_expression); } - interpreter_select = std::make_unique(select, context, storage, QueryProcessingStage::Complete, dry_run); + interpreter_select = std::make_unique(select, context, storage, + dry_run ? SelectQueryOptions::analyze() : SelectQueryOptions::run()); is_prepared = true; } diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h new file mode 100644 index 00000000000..ed9f171082f --- /dev/null +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -0,0 +1,62 @@ +#pragma once + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +/** + * to_stage + * - the stage to which the query is to be executed. By default - till to the end. + * You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing. + * + * subquery_depth + * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed; + * for INSERT SELECT, a value 1 is passed instead of 0. + * + * only_analyze + * - the object was created only for query analysis. + */ +struct SelectQueryOptions +{ + QueryProcessingStage::Enum to_stage = QueryProcessingStage::Complete; + size_t subquery_depth = 0; + bool only_analyze = false; + bool modify_inplace = false; + + static SelectQueryOptions run(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) + { + return {stage, depth, false, false}; + } + + static SelectQueryOptions analyze(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) + { + return {stage, depth, true, false}; + } + + static SelectQueryOptions analyzeModify(QueryProcessingStage::Enum stage, size_t depth = 0) + { + return {stage, depth, true, true}; + } + + const SelectQueryOptions & queryOptions() const { return *this; } + + SelectQueryOptions subqueryOptions(QueryProcessingStage::Enum stage) const + { + return SelectQueryOptions{stage, subquery_depth + 1, only_analyze, modify_inplace}; + } + + const SelectQueryOptions & checkZeroSubquery() const + { + if (subquery_depth) + throw Exception("Logical error: zero subquery depth expected", ErrorCodes::LOGICAL_ERROR); + return *this; + } +}; + +} diff --git a/dbms/src/Interpreters/interpretSubquery.cpp b/dbms/src/Interpreters/interpretSubquery.cpp index a585f7edc42..41a4d444223 100644 --- a/dbms/src/Interpreters/interpretSubquery.cpp +++ b/dbms/src/Interpreters/interpretSubquery.cpp @@ -124,7 +124,7 @@ std::shared_ptr interpretSubquery( } return std::make_shared( - query, subquery_context, required_source_columns, QueryProcessingStage::Complete, subquery_depth + 1); + query, subquery_context, SelectQueryOptions::run(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns); } } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 5487db29703..a71581e6f0d 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -221,7 +221,7 @@ BlockInputStreams StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) for (auto & stream : streams_from_buffers) - stream = InterpreterSelectQuery(query_info.query, context, stream, processed_stage).execute().in; + stream = InterpreterSelectQuery(query_info.query, context, stream, SelectQueryOptions::run(processed_stage)).execute().in; streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); return streams_from_dst; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 582dd976c0d..fabe263214f 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -286,7 +286,8 @@ BlockInputStreams StorageDistributed::read( const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); - Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, Names{}, processed_stage).getSampleBlock()); + Block header = materializeBlock( + InterpreterSelectQuery(query_info.query, context, SelectQueryOptions::run(processed_stage)).getSampleBlock()); ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr ? ClusterProxy::SelectStreamFactory( diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 4521083bc03..2c0b0bc1d5c 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -274,7 +274,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer if (!storage) return BlockInputStreams{ InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared(header), - processed_stage, true).execute().in}; + SelectQueryOptions::analyze(processed_stage)).execute().in}; BlockInputStreams source_streams; @@ -295,7 +295,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer modified_context.getSettingsRef().max_threads = UInt64(streams_num); modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1; - InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage}; + InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions::run(processed_stage)}; BlockInputStreamPtr interpreter_stream = interpreter.execute().in; /** Materialization is needed, since from distributed storage the constants come materialized. @@ -429,7 +429,7 @@ Block StorageMerge::getQueryHeader( case QueryProcessingStage::Complete: return materializeBlock(InterpreterSelectQuery( query_info.query, context, std::make_shared(getSampleBlockForColumns(column_names)), - processed_stage, true).getSampleBlock()); + SelectQueryOptions::analyze(processed_stage)).getSampleBlock()); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp index 97c085d16e3..0f02870a1c3 100644 --- a/dbms/src/Storages/StorageView.cpp +++ b/dbms/src/Storages/StorageView.cpp @@ -62,7 +62,7 @@ BlockInputStreams StorageView::read( current_inner_query = new_inner_query; } - res = InterpreterSelectWithUnionQuery(current_inner_query, context, column_names).executeWithMultipleStreams(); + res = InterpreterSelectWithUnionQuery(current_inner_query, context, {}, column_names).executeWithMultipleStreams(); /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. From ffc397493490855ca93749e41d06d7c20bb96864 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 15 Mar 2019 18:57:18 +0300 Subject: [PATCH 07/78] SelectQueryOptions v2 --- .../ClusterProxy/SelectStreamFactory.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- .../Interpreters/InterpreterExplainQuery.cpp | 2 +- dbms/src/Interpreters/InterpreterFactory.cpp | 4 +- .../Interpreters/InterpreterInsertQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 15 ++-- .../InterpreterSelectWithUnionQuery.cpp | 6 +- .../src/Interpreters/MutationsInterpreter.cpp | 2 +- dbms/src/Interpreters/SelectQueryOptions.h | 72 +++++++++++-------- dbms/src/Interpreters/interpretSubquery.cpp | 2 +- dbms/src/Storages/StorageBuffer.cpp | 2 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 6 +- 13 files changed, 63 insertions(+), 56 deletions(-) diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index e3337787a39..1083409f604 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -58,7 +58,7 @@ namespace BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage) { - InterpreterSelectQuery interpreter{query_ast, context, SelectQueryOptions::run(processed_stage)}; + InterpreterSelectQuery interpreter{query_ast, context, SelectQueryOptions(processed_stage)}; BlockInputStreamPtr stream = interpreter.execute().in; /** Materialization is needed, since from remote servers the constants come materialized. diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index c85f9557f6d..5a18f7def0f 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -78,7 +78,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr ASTPtr subquery_select = subquery.children.at(0); BlockIO res = InterpreterSelectWithUnionQuery( - subquery_select, subquery_context, SelectQueryOptions::run(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute(); + subquery_select, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute(); Block block; try diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.cpp b/dbms/src/Interpreters/InterpreterExplainQuery.cpp index cfdb4b0fe0c..3a85767f80c 100644 --- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp +++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp @@ -52,7 +52,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax) { InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, - SelectQueryOptions::analyzeModify(QueryProcessingStage::FetchColumns)); + modify(analyze(SelectQueryOptions(QueryProcessingStage::FetchColumns)))); interpreter.getQuery()->format(IAST::FormatSettings(ss, false)); } diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp index 05e3aa001fd..67eac76c790 100644 --- a/dbms/src/Interpreters/InterpreterFactory.cpp +++ b/dbms/src/Interpreters/InterpreterFactory.cpp @@ -84,12 +84,12 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, Context & { /// This is internal part of ASTSelectWithUnionQuery. /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child. - return std::make_unique(query, context, SelectQueryOptions::run(stage)); + return std::make_unique(query, context, SelectQueryOptions(stage)); } else if (typeid_cast(query.get())) { ProfileEvents::increment(ProfileEvents::SelectQuery); - return std::make_unique(query, context, SelectQueryOptions::run(stage)); + return std::make_unique(query, context, SelectQueryOptions(stage)); } else if (typeid_cast(query.get())) { diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 93506f86bbe..c6ccd7adf2d 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -128,7 +128,7 @@ BlockIO InterpreterInsertQuery::execute() if (query.select) { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. - InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions::run(QueryProcessingStage::Complete, 1)}; + InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)}; res.in = interpreter_select.execute().in; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 41079fc4479..675188bf5ab 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -90,18 +90,16 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const BlockInputStreamPtr & input_, const SelectQueryOptions & options) - : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, options.checkZeroSubquery()) -{ -} + : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, noSubquery(options)) +{} InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const StoragePtr & storage_, const SelectQueryOptions & options) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, options.checkZeroSubquery()) -{ -} + : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, noSubquery(options)) +{} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -985,11 +983,8 @@ void InterpreterSelectQuery::executeFetchColumns( if (!subquery) throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); - SelectQueryOptions opts = subqueryOptions(QueryProcessingStage::Complete); - opts.modify_inplace = false; - interpreter_subquery = std::make_unique( - subquery, getSubqueryContext(context), opts, required_columns); + subquery, getSubqueryContext(context), noModify(subqueryOptions(QueryProcessingStage::Complete)), required_columns); if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index a17a6958c44..389b18d3e86 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -53,7 +53,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( /// We use it to determine positions of 'required_result_column_names' in SELECT clause. Block full_result_header = InterpreterSelectQuery( - ast.list_of_selects->children.at(0), context, SelectQueryOptions::analyze(to_stage, subquery_depth)).getSampleBlock(); + ast.list_of_selects->children.at(0), context, analyze(noModify(queryOptions()))).getSampleBlock(); std::vector positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) @@ -62,7 +62,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (size_t query_num = 1; query_num < num_selects; ++query_num) { Block full_result_header_for_current_select = InterpreterSelectQuery( - ast.list_of_selects->children.at(query_num), context, SelectQueryOptions::analyze(to_stage, subquery_depth)).getSampleBlock(); + ast.list_of_selects->children.at(query_num), context, analyze(noModify(queryOptions()))).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) throw Exception("Different number of columns in UNION ALL elements:\n" @@ -172,7 +172,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock( return cache[key]; } - return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, SelectQueryOptions::analyze()).getSampleBlock(); + return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, analyze(SelectQueryOptions())).getSampleBlock(); } diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index 6a0c58f5f45..8445ccf51e4 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -367,7 +367,7 @@ void MutationsInterpreter::prepare(bool dry_run) } interpreter_select = std::make_unique(select, context, storage, - dry_run ? SelectQueryOptions::analyze() : SelectQueryOptions::run()); + dry_run ? analyze(SelectQueryOptions()) : SelectQueryOptions()); is_prepared = true; } diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h index ed9f171082f..f0699297128 100644 --- a/dbms/src/Interpreters/SelectQueryOptions.h +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -5,11 +5,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - /** * to_stage * - the stage to which the query is to be executed. By default - till to the end. @@ -21,42 +16,59 @@ namespace ErrorCodes * * only_analyze * - the object was created only for query analysis. + * + * is_subquery + * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results. */ -struct SelectQueryOptions +class SelectQueryOptions { - QueryProcessingStage::Enum to_stage = QueryProcessingStage::Complete; - size_t subquery_depth = 0; - bool only_analyze = false; - bool modify_inplace = false; - - static SelectQueryOptions run(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) - { - return {stage, depth, false, false}; - } - - static SelectQueryOptions analyze(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) - { - return {stage, depth, true, false}; - } - - static SelectQueryOptions analyzeModify(QueryProcessingStage::Enum stage, size_t depth = 0) - { - return {stage, depth, true, true}; - } +public: + SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) + : to_stage(stage) + , subquery_depth(depth) + , only_analyze(false) + , modify_inplace(false) + {} const SelectQueryOptions & queryOptions() const { return *this; } SelectQueryOptions subqueryOptions(QueryProcessingStage::Enum stage) const { - return SelectQueryOptions{stage, subquery_depth + 1, only_analyze, modify_inplace}; + SelectQueryOptions out = *this; + out.to_stage = stage; + ++out.subquery_depth; + return out; } - const SelectQueryOptions & checkZeroSubquery() const + friend SelectQueryOptions analyze(const SelectQueryOptions & src, bool value = true) { - if (subquery_depth) - throw Exception("Logical error: zero subquery depth expected", ErrorCodes::LOGICAL_ERROR); - return *this; + SelectQueryOptions out = src; + out.only_analyze = value; + return out; } + + friend SelectQueryOptions modify(const SelectQueryOptions & src, bool value = true) + { + SelectQueryOptions out = src; + out.modify_inplace = value; + return out; + } + + friend SelectQueryOptions noSubquery(const SelectQueryOptions & src) + { + SelectQueryOptions out = src; + out.subquery_depth = 0; + return out; + } + + friend SelectQueryOptions noModify(const SelectQueryOptions & src) { return modify(src, false); } + friend bool isSubquery(const SelectQueryOptions & opt) { return opt.subquery_depth; } + +protected: + QueryProcessingStage::Enum to_stage; + size_t subquery_depth; + bool only_analyze; + bool modify_inplace; }; } diff --git a/dbms/src/Interpreters/interpretSubquery.cpp b/dbms/src/Interpreters/interpretSubquery.cpp index 41a4d444223..b1eabbda13a 100644 --- a/dbms/src/Interpreters/interpretSubquery.cpp +++ b/dbms/src/Interpreters/interpretSubquery.cpp @@ -124,7 +124,7 @@ std::shared_ptr interpretSubquery( } return std::make_shared( - query, subquery_context, SelectQueryOptions::run(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns); + query, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns); } } diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index a71581e6f0d..22bf58fb828 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -221,7 +221,7 @@ BlockInputStreams StorageBuffer::read( */ if (processed_stage > QueryProcessingStage::FetchColumns) for (auto & stream : streams_from_buffers) - stream = InterpreterSelectQuery(query_info.query, context, stream, SelectQueryOptions::run(processed_stage)).execute().in; + stream = InterpreterSelectQuery(query_info.query, context, stream, SelectQueryOptions(processed_stage)).execute().in; streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); return streams_from_dst; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index fabe263214f..8ee3e7bfcb9 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -287,7 +287,7 @@ BlockInputStreams StorageDistributed::read( query_info.query, remote_database, remote_table, remote_table_function_ptr); Block header = materializeBlock( - InterpreterSelectQuery(query_info.query, context, SelectQueryOptions::run(processed_stage)).getSampleBlock()); + InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage)).getSampleBlock()); ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr ? ClusterProxy::SelectStreamFactory( diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 2c0b0bc1d5c..f344214fde8 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -274,7 +274,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer if (!storage) return BlockInputStreams{ InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared(header), - SelectQueryOptions::analyze(processed_stage)).execute().in}; + analyze(SelectQueryOptions(processed_stage))).execute().in}; BlockInputStreams source_streams; @@ -295,7 +295,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer modified_context.getSettingsRef().max_threads = UInt64(streams_num); modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1; - InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions::run(processed_stage)}; + InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)}; BlockInputStreamPtr interpreter_stream = interpreter.execute().in; /** Materialization is needed, since from distributed storage the constants come materialized. @@ -429,7 +429,7 @@ Block StorageMerge::getQueryHeader( case QueryProcessingStage::Complete: return materializeBlock(InterpreterSelectQuery( query_info.query, context, std::make_shared(getSampleBlockForColumns(column_names)), - SelectQueryOptions::analyze(processed_stage)).getSampleBlock()); + analyze(SelectQueryOptions(processed_stage))).getSampleBlock()); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); } From bd559f8db879da357af483747fcd430e00c7f218 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 18 Mar 2019 15:05:51 +0300 Subject: [PATCH 08/78] SelectQueryOptions v3 (no inheritance) --- .../Interpreters/InterpreterExplainQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 50 ++++++++++--------- .../src/Interpreters/InterpreterSelectQuery.h | 5 +- .../InterpreterSelectWithUnionQuery.cpp | 12 ++--- .../InterpreterSelectWithUnionQuery.h | 3 +- .../src/Interpreters/MutationsInterpreter.cpp | 3 +- dbms/src/Interpreters/SelectQueryOptions.h | 46 ++++++++--------- dbms/src/Storages/StorageMerge.cpp | 4 +- 8 files changed, 61 insertions(+), 64 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.cpp b/dbms/src/Interpreters/InterpreterExplainQuery.cpp index db88f49a242..3994ccb5cc7 100644 --- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp +++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp @@ -52,7 +52,7 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl() else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax) { InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, - modify(analyze(SelectQueryOptions(QueryProcessingStage::FetchColumns)))); + SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify()); interpreter.getQuery()->format(IAST::FormatSettings(ss, false)); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 51c979dd864..fc3a9dbc815 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -89,7 +89,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const BlockInputStreamPtr & input_, const SelectQueryOptions & options) - : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, noSubquery(options)) + : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, options.copy().noSubquery()) {} InterpreterSelectQuery::InterpreterSelectQuery( @@ -97,7 +97,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const StoragePtr & storage_, const SelectQueryOptions & options) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, noSubquery(options)) + : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, options.copy().noSubquery()) {} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -123,11 +123,11 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Context & context_, const BlockInputStreamPtr & input_, const StoragePtr & storage_, - const SelectQueryOptions & options, + const SelectQueryOptions & options_, const Names & required_result_column_names) - : SelectQueryOptions(options) + : options(options_) /// NOTE: the query almost always should be cloned because it will be modified during analysis. - , query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone()) + , query_ptr(options.modify_inplace ? query_ptr_ : query_ptr_->clone()) , context(context_) , storage(storage_) , input(input_) @@ -136,7 +136,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( initSettings(); const Settings & settings = context.getSettingsRef(); - if (settings.max_subquery_depth && subquery_depth > settings.max_subquery_depth) + if (settings.max_subquery_depth && options.subquery_depth > settings.max_subquery_depth) throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), ErrorCodes::TOO_DEEP_SUBQUERIES); @@ -174,7 +174,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Read from subquery. interpreter_subquery = std::make_unique( - table_expression, getSubqueryContext(context), subqueryOptions(QueryProcessingStage::Complete), required_columns); + table_expression, getSubqueryContext(context), options.subquery(), required_columns); source_header = interpreter_subquery->getSampleBlock(); } @@ -200,13 +200,14 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId()); - syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( + syntax_analyzer_result = SyntaxAnalyzer(context, options.subquery_depth).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), - NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze); + NameSet(required_result_column_names.begin(), required_result_column_names.end()), + options.subquery_depth, !options.only_analyze); - if (!only_analyze) + if (!options.only_analyze) { if (query.sample_size() && (input || !storage || !storage->supportsSampling())) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); @@ -223,7 +224,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( context.addExternalTable(it.first, it.second); } - if (!only_analyze || modify_inplace) + if (!options.only_analyze || options.modify_inplace) { if (query_analyzer->isRewriteSubqueriesPredicate()) { @@ -232,7 +233,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( interpreter_subquery = std::make_unique( table_expression, getSubqueryContext(context), - subqueryOptions(QueryProcessingStage::Complete), + options.subquery(), required_columns); } } @@ -286,7 +287,7 @@ Block InterpreterSelectQuery::getSampleBlock() BlockIO InterpreterSelectQuery::execute() { Pipeline pipeline; - executeImpl(pipeline, input, only_analyze); + executeImpl(pipeline, input, options.only_analyze); executeUnion(pipeline); BlockIO res; @@ -297,7 +298,7 @@ BlockIO InterpreterSelectQuery::execute() BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams() { Pipeline pipeline; - executeImpl(pipeline, input, only_analyze); + executeImpl(pipeline, input, options.only_analyze); return pipeline.streams; } @@ -307,10 +308,10 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing. res.first_stage = from_stage < QueryProcessingStage::WithMergeableState - && to_stage >= QueryProcessingStage::WithMergeableState; + && options.to_stage >= QueryProcessingStage::WithMergeableState; /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing. res.second_stage = from_stage <= QueryProcessingStage::WithMergeableState - && to_stage > QueryProcessingStage::WithMergeableState; + && options.to_stage > QueryProcessingStage::WithMergeableState; /** First we compose a chain of actions and remember the necessary steps from it. * Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and @@ -535,16 +536,16 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt expressions = analyzeExpressions(from_stage, false); if (from_stage == QueryProcessingStage::WithMergeableState && - to_stage == QueryProcessingStage::WithMergeableState) + options.to_stage == QueryProcessingStage::WithMergeableState) throw Exception("Distributed on Distributed is not supported", ErrorCodes::NOT_IMPLEMENTED); /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */ executeFetchColumns(from_stage, pipeline, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere); - LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage)); + LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(options.to_stage)); } - if (to_stage > QueryProcessingStage::FetchColumns) + if (options.to_stage > QueryProcessingStage::FetchColumns) { /// Do I need to aggregate in a separate row rows that have not passed max_rows_to_group_by. bool aggregate_overflow_row = @@ -557,7 +558,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt /// Do I need to immediately finalize the aggregate functions after the aggregation? bool aggregate_final = expressions.need_aggregate && - to_stage > QueryProcessingStage::WithMergeableState && + options.to_stage > QueryProcessingStage::WithMergeableState && !query.group_by_with_totals && !query.group_by_with_rollup && !query.group_by_with_cube; if (expressions.first_stage) @@ -920,7 +921,7 @@ void InterpreterSelectQuery::executeFetchColumns( /// Limitation on the number of columns to read. /// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns. - if (!only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) + if (!options.only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) throw Exception("Limit for number of columns to read exceeded. " "Requested: " + toString(required_columns.size()) + ", maximum: " + settings.max_columns_to_read.toString(), @@ -982,7 +983,8 @@ void InterpreterSelectQuery::executeFetchColumns( throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); interpreter_subquery = std::make_unique( - subquery, getSubqueryContext(context), noModify(subqueryOptions(QueryProcessingStage::Complete)), required_columns); + subquery, getSubqueryContext(context), + options.copy().subquery().noModify(), required_columns); if (query_analyzer->hasAggregation()) interpreter_subquery->ignoreWithTotals(); @@ -1039,7 +1041,7 @@ void InterpreterSelectQuery::executeFetchColumns( * additionally on each remote server, because these limits are checked per block of data processed, * and remote servers may process way more blocks of data than are received by initiator. */ - if (to_stage == QueryProcessingStage::Complete) + if (options.to_stage == QueryProcessingStage::Complete) { limits.min_execution_speed = settings.min_execution_speed; limits.max_execution_speed = settings.max_execution_speed; @@ -1054,7 +1056,7 @@ void InterpreterSelectQuery::executeFetchColumns( { stream->setLimits(limits); - if (to_stage == QueryProcessingStage::Complete) + if (options.to_stage == QueryProcessingStage::Complete) stream->setQuota(quota); }); } diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h index c3cbe00f169..4d8c4a7a39b 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectQuery.h @@ -7,8 +7,8 @@ #include #include #include -#include #include +#include #include #include @@ -27,7 +27,7 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr; /** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage. */ -class InterpreterSelectQuery : public IInterpreter, private SelectQueryOptions +class InterpreterSelectQuery : public IInterpreter { public: /** @@ -207,6 +207,7 @@ private: */ void initSettings(); + const SelectQueryOptions options; ASTPtr query_ptr; Context context; NamesAndTypesList source_columns; diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index efa63043a9b..4561affbd10 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -26,9 +26,9 @@ namespace ErrorCodes InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( const ASTPtr & query_ptr_, const Context & context_, - const SelectQueryOptions & options, + const SelectQueryOptions & options_, const Names & required_result_column_names) - : SelectQueryOptions(options), + : options(options_), query_ptr(query_ptr_), context(context_) { @@ -53,7 +53,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( /// We use it to determine positions of 'required_result_column_names' in SELECT clause. Block full_result_header = InterpreterSelectQuery( - ast.list_of_selects->children.at(0), context, analyze(noModify(queryOptions()))).getSampleBlock(); + ast.list_of_selects->children.at(0), context, options.copy().analyze().noModify()).getSampleBlock(); std::vector positions_of_required_result_columns(required_result_column_names.size()); for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num) @@ -62,7 +62,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (size_t query_num = 1; query_num < num_selects; ++query_num) { Block full_result_header_for_current_select = InterpreterSelectQuery( - ast.list_of_selects->children.at(query_num), context, analyze(noModify(queryOptions()))).getSampleBlock(); + ast.list_of_selects->children.at(query_num), context, options.copy().analyze().noModify()).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) throw Exception("Different number of columns in UNION ALL elements:\n" @@ -85,7 +85,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( nested_interpreters.emplace_back(std::make_unique( ast.list_of_selects->children.at(query_num), context, - queryOptions(), + options, current_required_result_column_names)); } @@ -172,7 +172,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock( return cache[key]; } - return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, analyze(SelectQueryOptions())).getSampleBlock(); + return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, SelectQueryOptions().analyze()).getSampleBlock(); } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h index a913c6dfc59..84d562a5308 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -14,7 +14,7 @@ class InterpreterSelectQuery; /** Interprets one or multiple SELECT queries inside UNION ALL chain. */ -class InterpreterSelectWithUnionQuery : public IInterpreter, private SelectQueryOptions +class InterpreterSelectWithUnionQuery : public IInterpreter { public: InterpreterSelectWithUnionQuery( @@ -41,6 +41,7 @@ public: ASTPtr getQuery() const { return query_ptr; } private: + const SelectQueryOptions options; ASTPtr query_ptr; Context context; diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index b1fcd7283f9..6415799af10 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -367,8 +367,7 @@ void MutationsInterpreter::prepare(bool dry_run) select->children.push_back(where_expression); } - interpreter_select = std::make_unique(select, context, storage, - dry_run ? analyze(SelectQueryOptions()) : SelectQueryOptions()); + interpreter_select = std::make_unique(select, context, storage, SelectQueryOptions().analyze(dry_run)); is_prepared = true; } diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h index f0699297128..56b4a8012b9 100644 --- a/dbms/src/Interpreters/SelectQueryOptions.h +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -20,9 +20,13 @@ namespace DB * is_subquery * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results. */ -class SelectQueryOptions +struct SelectQueryOptions { -public: + QueryProcessingStage::Enum to_stage; + size_t subquery_depth; + bool only_analyze; + bool modify_inplace; + SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) : to_stage(stage) , subquery_depth(depth) @@ -30,45 +34,35 @@ public: , modify_inplace(false) {} - const SelectQueryOptions & queryOptions() const { return *this; } + SelectQueryOptions copy() const { return *this; } - SelectQueryOptions subqueryOptions(QueryProcessingStage::Enum stage) const + SelectQueryOptions subquery() const { SelectQueryOptions out = *this; - out.to_stage = stage; + out.to_stage = QueryProcessingStage::Complete; ++out.subquery_depth; return out; } - friend SelectQueryOptions analyze(const SelectQueryOptions & src, bool value = true) + SelectQueryOptions & analyze(bool value = true) { - SelectQueryOptions out = src; - out.only_analyze = value; - return out; + only_analyze = value; + return *this; } - friend SelectQueryOptions modify(const SelectQueryOptions & src, bool value = true) + SelectQueryOptions & modify(bool value = true) { - SelectQueryOptions out = src; - out.modify_inplace = value; - return out; + modify_inplace = value; + return *this; } - friend SelectQueryOptions noSubquery(const SelectQueryOptions & src) + SelectQueryOptions & noModify() { return modify(false); } + + SelectQueryOptions & noSubquery() { - SelectQueryOptions out = src; - out.subquery_depth = 0; - return out; + subquery_depth = 0; + return *this; } - - friend SelectQueryOptions noModify(const SelectQueryOptions & src) { return modify(src, false); } - friend bool isSubquery(const SelectQueryOptions & opt) { return opt.subquery_depth; } - -protected: - QueryProcessingStage::Enum to_stage; - size_t subquery_depth; - bool only_analyze; - bool modify_inplace; }; } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index d938ba54292..40ce12abf54 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -274,7 +274,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer if (!storage) return BlockInputStreams{ InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared(header), - analyze(SelectQueryOptions(processed_stage))).execute().in}; + SelectQueryOptions(processed_stage).analyze()).execute().in}; BlockInputStreams source_streams; @@ -429,7 +429,7 @@ Block StorageMerge::getQueryHeader( case QueryProcessingStage::Complete: return materializeBlock(InterpreterSelectQuery( query_info.query, context, std::make_shared(getSampleBlockForColumns(column_names)), - analyze(SelectQueryOptions(processed_stage))).getSampleBlock()); + SelectQueryOptions(processed_stage).analyze()).getSampleBlock()); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); } From 7561ff2ab7e5eb86ab488caa09d2cdcb5f0b3681 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 18 Mar 2019 17:56:33 +0300 Subject: [PATCH 09/78] fix dups in GLOBAL JOIN with asterisks --- .../Interpreters/InterpreterSelectQuery.cpp | 2 +- dbms/src/Interpreters/SelectQueryOptions.h | 8 ++ dbms/src/Interpreters/SyntaxAnalyzer.cpp | 36 +++++--- dbms/src/Interpreters/SyntaxAnalyzer.h | 7 +- dbms/src/Interpreters/interpretSubquery.cpp | 44 +-------- .../00850_global_join_dups.reference | 5 + .../0_stateless/00850_global_join_dups.sql | 92 +++++++++---------- 7 files changed, 94 insertions(+), 100 deletions(-) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index fc3a9dbc815..182927b7104 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -200,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId()); - syntax_analyzer_result = SyntaxAnalyzer(context, options.subquery_depth).analyze( + syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze( query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h index 56b4a8012b9..0cf5827be3c 100644 --- a/dbms/src/Interpreters/SelectQueryOptions.h +++ b/dbms/src/Interpreters/SelectQueryOptions.h @@ -26,12 +26,14 @@ struct SelectQueryOptions size_t subquery_depth; bool only_analyze; bool modify_inplace; + bool remove_duplicates; SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0) : to_stage(stage) , subquery_depth(depth) , only_analyze(false) , modify_inplace(false) + , remove_duplicates(false) {} SelectQueryOptions copy() const { return *this; } @@ -58,6 +60,12 @@ struct SelectQueryOptions SelectQueryOptions & noModify() { return modify(false); } + SelectQueryOptions & removeDuplicates(bool value = true) + { + remove_duplicates = value; + return *this; + } + SelectQueryOptions & noSubquery() { subquery_depth = 0; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 34bec70223f..2ab21196f46 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -125,22 +125,36 @@ bool hasArrayJoin(const ASTPtr & ast) /// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. -void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) +/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible. +void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups) { - if (required_result_columns.empty()) - return; - ASTs & elements = select_query->select_expression_list->children; + std::map required_columns_with_duplicate_count; + + if (!required_result_columns.empty()) + { + /// Some columns may be queried multiple times, like SELECT x, y, y FROM table. + for (const auto & name : required_result_columns) + { + if (remove_dups) + required_columns_with_duplicate_count[name] = 1; + else + ++required_columns_with_duplicate_count[name]; + } + } + else if (remove_dups) + { + /// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.* + for (const auto & elem : elements) + required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1); + } + else + return; + ASTs new_elements; new_elements.reserve(elements.size()); - /// Some columns may be queried multiple times, like SELECT x, y, y FROM table. - /// In that case we keep them exactly same number of times. - std::map required_columns_with_duplicate_count; - for (const auto & name : required_result_columns) - ++required_columns_with_duplicate_count[name]; - for (const auto & elem : elements) { String name = elem->getAliasOrColumnName(); @@ -688,7 +702,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. if (select_query) - removeUnneededColumnsFromSelectClause(select_query, required_result_columns); + removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, context, subquery_depth); diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 4450881ee68..90ab01c2397 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { @@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr; class SyntaxAnalyzer { public: - SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0) + SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {}) : context(context_) - , subquery_depth(subquery_depth_) + , subquery_depth(select_options.subquery_depth) + , remove_duplicates(select_options.remove_duplicates) {} SyntaxAnalyzerResultPtr analyze( @@ -69,6 +71,7 @@ public: private: const Context & context; size_t subquery_depth; + bool remove_duplicates; }; } diff --git a/dbms/src/Interpreters/interpretSubquery.cpp b/dbms/src/Interpreters/interpretSubquery.cpp index 11681dd9ce8..93f9c728712 100644 --- a/dbms/src/Interpreters/interpretSubquery.cpp +++ b/dbms/src/Interpreters/interpretSubquery.cpp @@ -41,6 +41,8 @@ std::shared_ptr interpretSubquery( subquery_settings.extremes = 0; subquery_context.setSettings(subquery_settings); + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery(); + ASTPtr query; if (table || function) { @@ -83,48 +85,10 @@ std::shared_ptr interpretSubquery( else { query = subquery->children.at(0); - - /** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t - * This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns. - * Saving to the table is required for GLOBAL subqueries. - * - * To avoid this situation, we will rename the same columns. - */ - - std::set all_column_names; - std::set assigned_column_names; - - if (const auto * select_with_union = query->as()) - { - if (const auto * select = select_with_union->list_of_selects->children.at(0)->as()) - { - for (auto & expr : select->select_expression_list->children) - all_column_names.insert(expr->getAliasOrColumnName()); - - for (auto & expr : select->select_expression_list->children) - { - auto name = expr->getAliasOrColumnName(); - - if (!assigned_column_names.insert(name).second) - { - size_t i = 1; - while (all_column_names.end() != all_column_names.find(name + "_" + toString(i))) - ++i; - - name = name + "_" + toString(i); - expr = expr->clone(); /// Cancels fuse of the same expressions in the tree. - expr->setAlias(name); - - all_column_names.insert(name); - assigned_column_names.insert(name); - } - } - } - } + subquery_options.removeDuplicates(); } - return std::make_shared( - query, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth + 1), required_source_columns); + return std::make_shared(query, subquery_context, subquery_options, required_source_columns); } } diff --git a/dbms/tests/queries/0_stateless/00850_global_join_dups.reference b/dbms/tests/queries/0_stateless/00850_global_join_dups.reference index b261da18d51..dcaae3f745e 100644 --- a/dbms/tests/queries/0_stateless/00850_global_join_dups.reference +++ b/dbms/tests/queries/0_stateless/00850_global_join_dups.reference @@ -1,2 +1,7 @@ 1 0 +0 +0 0 +0 +0 0 +0 0 diff --git a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql index faf0397374a..d9b749abd5d 100644 --- a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql +++ b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql @@ -35,49 +35,49 @@ GLOBAL INNER JOIN ) USING dummy; --- SET asterisk_left_columns_only = 0; --- --- SELECT * FROM remote('127.0.0.2', system.one) --- GLOBAL INNER JOIN --- ( --- SELECT *, dummy --- FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1 --- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 --- USING dummy --- ) USING dummy; --- --- SELECT * FROM remote('127.0.0.2', system.one) --- GLOBAL INNER JOIN --- ( --- SELECT *, t1.*, t2.* --- FROM ( SELECT toUInt8(1) AS dummy ) t1 --- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 --- USING dummy --- ) USING dummy; --- --- SELECT * FROM remote('127.0.0.2', system.one) --- GLOBAL INNER JOIN --- ( --- SELECT *, dummy --- FROM ( SELECT toUInt8(1) AS dummy ) t1 --- INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 --- USING dummy --- ) USING dummy; --- --- SELECT * FROM remote('127.0.0.2', system.one) --- GLOBAL INNER JOIN --- ( --- SELECT * --- FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1 --- GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2 --- USING dummy --- ) USING dummy; --- --- SELECT * FROM remote('127.0.0.2', system.one) --- GLOBAL INNER JOIN --- ( --- SELECT * --- FROM ( SELECT toUInt8(1) AS dummy ) t1 --- GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 --- USING dummy --- ) USING dummy; +SET asterisk_left_columns_only = 0; + +SELECT * FROM remote('127.0.0.2', system.one) +GLOBAL INNER JOIN +( + SELECT *, dummy + FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1 + GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 + USING dummy +) USING dummy; + +SELECT * FROM remote('127.0.0.2', system.one) +GLOBAL INNER JOIN +( + SELECT *, t1.*, t2.* + FROM ( SELECT toUInt8(0) AS dummy ) t1 + INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2 + USING dummy +) USING dummy; + +SELECT * FROM remote('127.0.0.2', system.one) +GLOBAL INNER JOIN +( + SELECT *, dummy + FROM ( SELECT toUInt8(0) AS dummy ) t1 + INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2 + USING dummy +) USING dummy; + +SELECT * FROM remote('127.0.0.2', system.one) +GLOBAL INNER JOIN +( + SELECT *, dummy as other + FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1 + GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2 + USING dummy +) USING dummy; + +SELECT * FROM remote('127.0.0.2', system.one) +GLOBAL INNER JOIN +( + SELECT *, dummy, dummy as other + FROM ( SELECT toUInt8(0) AS dummy ) t1 + GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2 + USING dummy +) USING dummy; From 913377fdafc8af83ab4bfef8252ec0d58d1305b9 Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 18 Mar 2019 21:45:49 +0300 Subject: [PATCH 10/78] restore GLOBAL IN column renames --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 34 ++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 2ab21196f46..08dbfc1ee8c 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -123,6 +123,37 @@ bool hasArrayJoin(const ASTPtr & ast) return false; } +/// Keep number of columns for 'GLOBAL IN (SELECT 1 AS a, a)' +void renameDuplicatedColumns(const ASTSelectQuery * select_query) +{ + ASTs & elements = select_query->select_expression_list->children; + + std::set all_column_names; + std::set assigned_column_names; + + for (auto & expr : elements) + all_column_names.insert(expr->getAliasOrColumnName()); + + for (auto & expr : elements) + { + auto name = expr->getAliasOrColumnName(); + + if (!assigned_column_names.insert(name).second) + { + size_t i = 1; + while (all_column_names.end() != all_column_names.find(name + "_" + toString(i))) + ++i; + + name = name + "_" + toString(i); + expr = expr->clone(); /// Cancels fuse of the same expressions in the tree. + expr->setAlias(name); + + all_column_names.insert(name); + assigned_column_names.insert(name); + } + } +} + /// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. /// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible. @@ -659,6 +690,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (select_query) { + if (remove_duplicates) + renameDuplicatedColumns(select_query); + if (const ASTTablesInSelectQueryElement * node = select_query->join()) { if (settings.enable_optimize_predicate_expression) From e5a9633132f14a1c5a5fb181c0e3bd3d71911109 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 19 Mar 2019 19:53:36 +0300 Subject: [PATCH 11/78] Join.cpp refactoring --- dbms/src/Interpreters/Join.cpp | 250 ++++++++++----------------------- dbms/src/Interpreters/Join.h | 46 ++++++ 2 files changed, 123 insertions(+), 173 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 7ce39e12a00..6079ed5b7fd 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -120,56 +120,6 @@ Join::Type Join::chooseMethod(const ColumnRawPtrs & key_columns, Sizes & key_siz } -template -static void initImpl(Maps & maps, Join::Type type) -{ - switch (type) - { - case Join::Type::EMPTY: break; - case Join::Type::CROSS: break; - - #define M(TYPE) \ - case Join::Type::TYPE: maps.TYPE = std::make_unique(); break; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } -} - -template -static size_t getTotalRowCountImpl(const Maps & maps, Join::Type type) -{ - switch (type) - { - case Join::Type::EMPTY: return 0; - case Join::Type::CROSS: return 0; - - #define M(NAME) \ - case Join::Type::NAME: return maps.NAME ? maps.NAME->size() : 0; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); -} - -template -static size_t getTotalByteCountImpl(const Maps & maps, Join::Type type) -{ - switch (type) - { - case Join::Type::EMPTY: return 0; - case Join::Type::CROSS: return 0; - - #define M(NAME) \ - case Join::Type::NAME: return maps.NAME ? maps.NAME->getBufferSizeInBytes() : 0; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - } - - __builtin_unreachable(); -} - - template struct KeyGetterForTypeImpl; @@ -227,7 +177,7 @@ void Join::init(Type type_) if (kind == ASTTableJoin::Kind::Cross) return; dispatch(MapInitTag()); - dispatch([&](auto, auto, auto & map) { initImpl(map, type); }); + dispatch([&](auto, auto, auto & map) { map.create(type); }); } size_t Join::getTotalRowCount() const @@ -241,7 +191,7 @@ size_t Join::getTotalRowCount() const } else { - dispatch([&](auto, auto, auto & map) { res += getTotalRowCountImpl(map, type); }); + dispatch([&](auto, auto, auto & map) { res += map.getTotalRowCount(type); }); } return res; @@ -258,7 +208,7 @@ size_t Join::getTotalByteCount() const } else { - dispatch([&](auto, auto, auto & map) { res += getTotalByteCountImpl(map, type); }); + dispatch([&](auto, auto, auto & map) { res += map.getTotalByteCountImpl(type); }); res += pool.size(); } @@ -526,61 +476,20 @@ bool Join::insertFromBlock(const Block & block) namespace { - template - struct Adder; - - template - struct Adder + template + void addFoundRow(const typename Map::mapped_type & mapped, const std::vector & right_indexes, + MutableColumns & added_columns, IColumn::Offset & current_offset [[maybe_unused]]) { - static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, - const std::vector & right_indexes) - { - filter[i] = 1; + size_t num_columns_to_add = right_indexes.size(); + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) + { for (size_t j = 0; j < num_columns_to_add; ++j) added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num); } - static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) { - filter[i] = 0; - - for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertDefault(); - } - }; - - template - struct Adder - { - static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/, - const std::vector & right_indexes) - { - filter[i] = 1; - - for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num); - } - - static void addNotFound(size_t /*num_columns_to_add*/, MutableColumns & /*added_columns*/, - size_t i, IColumn::Filter & filter, IColumn::Offset & /*current_offset*/, IColumn::Offsets * /*offsets*/) - { - filter[i] = 0; - } - }; - - template - struct Adder - { - static void addFound(const typename Map::mapped_type & mapped, size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets, - const std::vector & right_indexes) - { - filter[i] = 1; - size_t rows_joined = 0; for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { @@ -591,48 +500,42 @@ namespace } current_offset += rows_joined; - (*offsets)[i] = current_offset; - } - - static void addNotFound(size_t num_columns_to_add, MutableColumns & added_columns, - size_t i, IColumn::Filter & filter, IColumn::Offset & current_offset, IColumn::Offsets * offsets) - { - filter[i] = 0; - - if (!fill_left) - { - (*offsets)[i] = current_offset; - } - else - { - ++current_offset; - (*offsets)[i] = current_offset; - - for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertDefault(); - } } }; + template + void addNotFoundRow(const std::vector & right_indexes [[maybe_unused]], MutableColumns & added_columns [[maybe_unused]], + IColumn::Offset & current_offset [[maybe_unused]]) + { + if constexpr (fill_left) + { + ++current_offset; + + for (size_t j = 0; j < right_indexes.size(); ++j) + added_columns[j]->insertDefault(); + } + } + template - void NO_INLINE joinBlockImplTypeCase( - const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + std::unique_ptr NO_INLINE joinRightIndexColumns( + const Map & map, size_t rows, KeyGetter & key_getter, MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, - std::unique_ptr & offsets_to_replicate, const std::vector & right_indexes) { - IColumn::Offset current_offset = 0; - size_t num_columns_to_add = right_indexes.size(); + constexpr bool fill_left = Join::KindTrait::fill_left; + std::unique_ptr offsets_to_replicate; + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; Arena pool; - KeyGetter key_getter(key_columns, key_sizes, nullptr); for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) { - Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); + addNotFoundRow(right_indexes, added_columns, current_offset); } else { @@ -640,43 +543,60 @@ namespace if (find_result.isFound()) { + filter[i] = 1; auto & mapped = find_result.getMapped(); mapped.setUsed(); - Adder::fill_left, STRICTNESS, Map>::addFound( - mapped, num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get(), right_indexes); + addFoundRow(mapped, right_indexes, added_columns, current_offset); } else - Adder::fill_left, STRICTNESS, Map>::addNotFound( - num_columns_to_add, added_columns, i, filter, current_offset, offsets_to_replicate.get()); + addNotFoundRow(right_indexes, added_columns, current_offset); } + + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + (*offsets_to_replicate)[i] = current_offset; } + + return offsets_to_replicate; } - using BlockFilterData = std::pair< - std::unique_ptr, - std::unique_ptr>; - template - BlockFilterData joinBlockImplType( + IColumn::Filter joinRightIndex( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes) + MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, + std::unique_ptr & offsets_to_replicate) { - std::unique_ptr filter = std::make_unique(rows); - std::unique_ptr offsets_to_replicate; - - if (STRICTNESS == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); + IColumn::Filter filter(rows, 0); + KeyGetter key_getter(key_columns, key_sizes, nullptr); if (null_map) - joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, *filter, - offsets_to_replicate, right_indexes); + offsets_to_replicate = joinRightIndexColumns( + map, rows, key_getter, added_columns, null_map, filter, right_indexes); else - joinBlockImplTypeCase( - map, rows, key_columns, key_sizes, added_columns, null_map, *filter, - offsets_to_replicate, right_indexes); + offsets_to_replicate = joinRightIndexColumns( + map, rows, key_getter, added_columns, null_map, filter, right_indexes); - return {std::move(filter), std::move(offsets_to_replicate)}; + return filter; + } + + template + IColumn::Filter switchJoinRightIndex( + Join::Type type, + const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, + std::unique_ptr & offsets_to_replicate) + { + switch (type) + { + #define M(TYPE) \ + case Join::Type::TYPE: \ + return joinRightIndex>::Type>(\ + *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, right_indexes, offsets_to_replicate); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + + default: + throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } } } @@ -762,31 +682,15 @@ void Join::joinBlockImpl( } } - std::unique_ptr filter; std::unique_ptr offsets_to_replicate; - switch (type) - { - #define M(TYPE) \ - case Join::Type::TYPE: \ - std::tie(filter, offsets_to_replicate) = \ - joinBlockImplType>::Type>(\ - *maps_.TYPE, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes); \ - break; - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); - } + IColumn::Filter filter = switchJoinRightIndex( + type, maps_, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes, offsets_to_replicate); const auto added_columns_size = added_columns.size(); for (size_t i = 0; i < added_columns_size; ++i) block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); - if (!filter) - throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); - NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); if (strictness == ASTTableJoin::Strictness::Any) @@ -795,7 +699,7 @@ void Join::joinBlockImpl( { /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) - block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(*filter, -1); + block.safeGetByPosition(i).column = block.safeGetByPosition(i).column->filter(filter, -1); /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) @@ -824,9 +728,9 @@ void Join::joinBlockImpl( auto & column = col.column; MutableColumnPtr mut_column = column->cloneEmpty(); - for (size_t col_no = 0; col_no < filter->size(); ++col_no) + for (size_t col_no = 0; col_no < filter.size(); ++col_no) { - if ((*filter)[col_no]) + if (filter[col_no]) mut_column->insertFrom(*column, col_no); else mut_column->insertDefault(); @@ -859,7 +763,7 @@ void Join::joinBlockImpl( { if (size_t to_insert = (*offsets_to_replicate)[col_no] - last_offset) { - if (!(*filter)[col_no]) + if (!filter[col_no]) mut_column->insertDefault(); else for (size_t dup = 0; dup < to_insert; ++dup) diff --git a/dbms/src/Interpreters/Join.h b/dbms/src/Interpreters/Join.h index b6ac8dc4fd5..ceaf0f847db 100644 --- a/dbms/src/Interpreters/Join.h +++ b/dbms/src/Interpreters/Join.h @@ -228,6 +228,52 @@ public: std::unique_ptr> keys128; std::unique_ptr> keys256; std::unique_ptr> hashed; + + void create(Type which) + { + switch (which) + { + case Type::EMPTY: break; + case Type::CROSS: break; + + #define M(NAME) \ + case Type::NAME: NAME = std::make_unique(); break; + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + } + } + + size_t getTotalRowCount(Type which) const + { + switch (which) + { + case Type::EMPTY: return 0; + case Type::CROSS: return 0; + + #define M(NAME) \ + case Type::NAME: return NAME ? NAME->size() : 0; + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } + + size_t getTotalByteCountImpl(Type which) const + { + switch (which) + { + case Type::EMPTY: return 0; + case Type::CROSS: return 0; + + #define M(NAME) \ + case Type::NAME: return NAME ? NAME->getBufferSizeInBytes() : 0; + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + } + + __builtin_unreachable(); + } }; using MapsAny = MapsTemplate>; From fbcf82c4dfb1e4654205edef48267ec43f87ceb3 Mon Sep 17 00:00:00 2001 From: liuyangkuan Date: Thu, 14 Mar 2019 16:05:18 +0800 Subject: [PATCH 12/78] add interpretation of PREWHERE in StorageBuffer::read --- dbms/src/Storages/StorageBuffer.cpp | 16 ++++++++++++++++ .../0_stateless/00910_buffer_prewhere.reference | 2 +- .../0_stateless/00910_buffer_prewhere.sql | 5 +++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 9706b1d5562..7d5244f959c 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -25,6 +25,8 @@ #include #include +#include +#include namespace ProfileEvents @@ -223,6 +225,20 @@ BlockInputStreams StorageBuffer::read( for (auto & stream : streams_from_buffers) stream = InterpreterSelectQuery(query_info.query, context, stream, processed_stage).execute().in; + if (query_info.prewhere_info) + { + for (auto & stream : streams_from_buffers) + stream = std::make_shared(stream, query_info.prewhere_info->prewhere_actions, + query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column); + + if (query_info.prewhere_info->alias_actions) + { + for (auto & stream : streams_from_buffers) + stream = std::make_shared(stream, query_info.prewhere_info->alias_actions); + + } + } + streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end()); return streams_from_dst; } diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference index 56a6051ca2b..d00491fd7e5 100644 --- a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference +++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference @@ -1 +1 @@ -1 \ No newline at end of file +1 diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql index 43d7735d832..d7d9813c5f5 100644 --- a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql +++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql @@ -2,5 +2,6 @@ DROP DATABASE IF EXISTS test_buffer; CREATE DATABASE test_buffer; CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts); CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000); -INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25) -SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00') \ No newline at end of file +INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25); +SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00'); +DROP DATABASE test_buffer; From ef2d77f9463d58f0b013f5a76d5d2601cd871e9f Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 15:08:38 +0300 Subject: [PATCH 13/78] some more refactoring --- dbms/src/Interpreters/Join.cpp | 83 ++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 6079ed5b7fd..8c6b8d1d67c 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -598,6 +598,57 @@ namespace throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } } + + struct AdditionalColumns + { + using TypeAndNames = std::vector>; + + TypeAndNames type_name; + MutableColumns columns; + + AdditionalColumns(size_t reserve) + { + columns.reserve(reserve); + type_name.reserve(reserve); + } + + void add(const ColumnWithTypeAndName & src_column) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + } + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); + } + }; + + AdditionalColumns calcAdditionalColumns(const Block & sample_block_with_columns_to_add, + const Block & block_with_columns_to_add, + const Block & block, + std::vector & right_indexes, size_t num_columns_to_skip) + { + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + AdditionalColumns additional_columns(num_columns_to_add); + right_indexes.reserve(num_columns_to_add); + + for (size_t i = 0; i < num_columns_to_add; ++i) + { + const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); + + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) + { + additional_columns.add(src_column); + right_indexes.push_back(num_columns_to_skip + i); + } + } + + return additional_columns; + } } @@ -658,38 +709,20 @@ void Join::joinBlockImpl( num_columns_to_skip = keys_size; /// Add new columns to the block. - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - MutableColumns added_columns; - added_columns.reserve(num_columns_to_add); - - std::vector> added_type_name; - added_type_name.reserve(num_columns_to_add); std::vector right_indexes; - right_indexes.reserve(num_columns_to_add); - - for (size_t i = 0; i < num_columns_to_add; ++i) - { - const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - - /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) - { - added_columns.push_back(src_column.column->cloneEmpty()); - added_columns.back()->reserve(src_column.column->size()); - added_type_name.emplace_back(src_column.type, src_column.name); - right_indexes.push_back(num_columns_to_skip + i); - } - } + AdditionalColumns added = calcAdditionalColumns(sample_block_with_columns_to_add, block_with_columns_to_add, block, + right_indexes, num_columns_to_skip); std::unique_ptr offsets_to_replicate; IColumn::Filter filter = switchJoinRightIndex( - type, maps_, block.rows(), key_columns, key_sizes, added_columns, null_map, right_indexes, offsets_to_replicate); + type, maps_, block.rows(), key_columns, key_sizes, added.columns, null_map, right_indexes, offsets_to_replicate); - const auto added_columns_size = added_columns.size(); - for (size_t i = 0; i < added_columns_size; ++i) - block.insert(ColumnWithTypeAndName(std::move(added_columns[i]), added_type_name[i].first, added_type_name[i].second)); + for (size_t i = 0; i < added.columns.size(); ++i) + block.insert(added.moveColumn(i)); + + /// Filter & insert missing rows NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); From e1f015295b3096dc5ef424cf986a31cb16ced9d2 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 15:28:45 +0300 Subject: [PATCH 14/78] rename functions & add comment --- dbms/src/Interpreters/Join.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 8c6b8d1d67c..a98fa774ee6 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -516,8 +516,10 @@ namespace } } + /// Joins right table columns which indexes are present in right_indexes using specified map. + /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). template - std::unique_ptr NO_INLINE joinRightIndexColumns( + std::unique_ptr NO_INLINE joinRightIndexedColumns( const Map & map, size_t rows, KeyGetter & key_getter, MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, const std::vector & right_indexes) @@ -560,7 +562,7 @@ namespace } template - IColumn::Filter joinRightIndex( + IColumn::Filter joinRightColumns( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, std::unique_ptr & offsets_to_replicate) @@ -569,17 +571,17 @@ namespace KeyGetter key_getter(key_columns, key_sizes, nullptr); if (null_map) - offsets_to_replicate = joinRightIndexColumns( + offsets_to_replicate = joinRightIndexedColumns( map, rows, key_getter, added_columns, null_map, filter, right_indexes); else - offsets_to_replicate = joinRightIndexColumns( + offsets_to_replicate = joinRightIndexedColumns( map, rows, key_getter, added_columns, null_map, filter, right_indexes); return filter; } template - IColumn::Filter switchJoinRightIndex( + IColumn::Filter switchJoinRightColumns( Join::Type type, const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, @@ -589,7 +591,7 @@ namespace { #define M(TYPE) \ case Join::Type::TYPE: \ - return joinRightIndex>::Type>(\ + return joinRightColumns>::Type>(\ *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, right_indexes, offsets_to_replicate); APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -716,7 +718,7 @@ void Join::joinBlockImpl( std::unique_ptr offsets_to_replicate; - IColumn::Filter filter = switchJoinRightIndex( + IColumn::Filter filter = switchJoinRightColumns( type, maps_, block.rows(), key_columns, key_sizes, added.columns, null_map, right_indexes, offsets_to_replicate); for (size_t i = 0; i < added.columns.size(); ++i) From 2a4da2f6870ea813f22a441bb39eddc2024bb335 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 16:38:17 +0300 Subject: [PATCH 15/78] more refactoring (rename misleading variable) --- dbms/src/Interpreters/Join.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index a98fa774ee6..773477a7ecc 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -503,11 +503,11 @@ namespace } }; - template + template void addNotFoundRow(const std::vector & right_indexes [[maybe_unused]], MutableColumns & added_columns [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { - if constexpr (fill_left) + if constexpr (_add_missing) { ++current_offset; @@ -518,14 +518,12 @@ namespace /// Joins right table columns which indexes are present in right_indexes using specified map. /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). - template + template std::unique_ptr NO_INLINE joinRightIndexedColumns( const Map & map, size_t rows, KeyGetter & key_getter, MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, const std::vector & right_indexes) { - constexpr bool fill_left = Join::KindTrait::fill_left; - std::unique_ptr offsets_to_replicate; if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) offsets_to_replicate = std::make_unique(rows); @@ -535,9 +533,9 @@ namespace for (size_t i = 0; i < rows; ++i) { - if (has_null_map && (*null_map)[i]) + if (_has_null_map && (*null_map)[i]) { - addNotFoundRow(right_indexes, added_columns, current_offset); + addNotFoundRow<_add_missing>(right_indexes, added_columns, current_offset); } else { @@ -551,7 +549,7 @@ namespace addFoundRow(mapped, right_indexes, added_columns, current_offset); } else - addNotFoundRow(right_indexes, added_columns, current_offset); + addNotFoundRow<_add_missing>(right_indexes, added_columns, current_offset); } if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) @@ -567,14 +565,16 @@ namespace MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, std::unique_ptr & offsets_to_replicate) { + constexpr bool left_or_full = static_in_v; + IColumn::Filter filter(rows, 0); KeyGetter key_getter(key_columns, key_sizes, nullptr); if (null_map) - offsets_to_replicate = joinRightIndexedColumns( + offsets_to_replicate = joinRightIndexedColumns( map, rows, key_getter, added_columns, null_map, filter, right_indexes); else - offsets_to_replicate = joinRightIndexedColumns( + offsets_to_replicate = joinRightIndexedColumns( map, rows, key_getter, added_columns, null_map, filter, right_indexes); return filter; From 1bb05eb0892d8b4d4781c1cfda51530176753d39 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 17:41:35 +0300 Subject: [PATCH 16/78] more Join.cpp refactoring (hide some logic into AddedColumns) --- dbms/src/Interpreters/Join.cpp | 168 ++++++++++++++++----------------- 1 file changed, 83 insertions(+), 85 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 773477a7ecc..9a290fb0c60 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -476,43 +476,96 @@ bool Join::insertFromBlock(const Block & block) namespace { - template - void addFoundRow(const typename Map::mapped_type & mapped, const std::vector & right_indexes, - MutableColumns & added_columns, IColumn::Offset & current_offset [[maybe_unused]]) + class AddedColumns { - size_t num_columns_to_add = right_indexes.size(); + public: + using TypeAndNames = std::vector>; + AddedColumns(size_t reserve) + { + columns.reserve(reserve); + type_name.reserve(reserve); + right_indexes.reserve(reserve); + } + + size_t size() const { return columns.size(); } + + void add(const ColumnWithTypeAndName & src_column, size_t idx) + { + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + right_indexes.push_back(idx); + } + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); + } + + void appendFromBlock(const Block & block, size_t row_num) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); + } + + void appendDefaultRow() + { + for (size_t j = 0; j < right_indexes.size(); ++j) + columns[j]->insertDefault(); + } + + private: + TypeAndNames type_name; + MutableColumns columns; + std::vector right_indexes; + }; + + AddedColumns calcAddedColumns(const Block & sample_block_with_columns_to_add, + const Block & block_with_columns_to_add, + const Block & block, size_t num_columns_to_skip) + { + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + AddedColumns additional_columns(num_columns_to_add); + + for (size_t i = 0; i < num_columns_to_add; ++i) + { + const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); + + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) + additional_columns.add(src_column, num_columns_to_skip + i); + } + + return additional_columns; + } + + template + void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]]) + { if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { - for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertFrom(*mapped.block->getByPosition(right_indexes[j]).column, mapped.row_num); + added.appendFromBlock(*mapped.block, mapped.row_num); } if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) { - size_t rows_joined = 0; for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < num_columns_to_add; ++j) - added_columns[j]->insertFrom(*current->block->getByPosition(right_indexes[j]).column.get(), current->row_num); - - ++rows_joined; + added.appendFromBlock(*current->block, current->row_num); + ++current_offset; } - - current_offset += rows_joined; } }; template - void addNotFoundRow(const std::vector & right_indexes [[maybe_unused]], MutableColumns & added_columns [[maybe_unused]], - IColumn::Offset & current_offset [[maybe_unused]]) + void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) { if constexpr (_add_missing) { + added.appendDefaultRow(); ++current_offset; - - for (size_t j = 0; j < right_indexes.size(); ++j) - added_columns[j]->insertDefault(); } } @@ -521,8 +574,7 @@ namespace template std::unique_ptr NO_INLINE joinRightIndexedColumns( const Map & map, size_t rows, KeyGetter & key_getter, - MutableColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter, - const std::vector & right_indexes) + AddedColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter) { std::unique_ptr offsets_to_replicate; if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) @@ -535,7 +587,7 @@ namespace { if (_has_null_map && (*null_map)[i]) { - addNotFoundRow<_add_missing>(right_indexes, added_columns, current_offset); + addNotFoundRow<_add_missing>(added_columns, current_offset); } else { @@ -546,10 +598,10 @@ namespace filter[i] = 1; auto & mapped = find_result.getMapped(); mapped.setUsed(); - addFoundRow(mapped, right_indexes, added_columns, current_offset); + addFoundRow(mapped, added_columns, current_offset); } else - addNotFoundRow<_add_missing>(right_indexes, added_columns, current_offset); + addNotFoundRow<_add_missing>(added_columns, current_offset); } if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) @@ -562,8 +614,7 @@ namespace template IColumn::Filter joinRightColumns( const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, - std::unique_ptr & offsets_to_replicate) + AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) { constexpr bool left_or_full = static_in_v; @@ -572,10 +623,10 @@ namespace if (null_map) offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_getter, added_columns, null_map, filter, right_indexes); + map, rows, key_getter, added_columns, null_map, filter); else offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_getter, added_columns, null_map, filter, right_indexes); + map, rows, key_getter, added_columns, null_map, filter); return filter; } @@ -584,7 +635,7 @@ namespace IColumn::Filter switchJoinRightColumns( Join::Type type, const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - MutableColumns & added_columns, ConstNullMapPtr null_map, const std::vector & right_indexes, + AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) { switch (type) @@ -592,7 +643,7 @@ namespace #define M(TYPE) \ case Join::Type::TYPE: \ return joinRightColumns>::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, right_indexes, offsets_to_replicate); + *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -600,57 +651,6 @@ namespace throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); } } - - struct AdditionalColumns - { - using TypeAndNames = std::vector>; - - TypeAndNames type_name; - MutableColumns columns; - - AdditionalColumns(size_t reserve) - { - columns.reserve(reserve); - type_name.reserve(reserve); - } - - void add(const ColumnWithTypeAndName & src_column) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name); - } - - ColumnWithTypeAndName moveColumn(size_t i) - { - return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); - } - }; - - AdditionalColumns calcAdditionalColumns(const Block & sample_block_with_columns_to_add, - const Block & block_with_columns_to_add, - const Block & block, - std::vector & right_indexes, size_t num_columns_to_skip) - { - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - - AdditionalColumns additional_columns(num_columns_to_add); - right_indexes.reserve(num_columns_to_add); - - for (size_t i = 0; i < num_columns_to_add; ++i) - { - const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - - /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) - { - additional_columns.add(src_column); - right_indexes.push_back(num_columns_to_skip + i); - } - } - - return additional_columns; - } } @@ -712,16 +712,14 @@ void Join::joinBlockImpl( /// Add new columns to the block. - std::vector right_indexes; - AdditionalColumns added = calcAdditionalColumns(sample_block_with_columns_to_add, block_with_columns_to_add, block, - right_indexes, num_columns_to_skip); + AddedColumns added = calcAddedColumns(sample_block_with_columns_to_add, block_with_columns_to_add, block, num_columns_to_skip); std::unique_ptr offsets_to_replicate; IColumn::Filter filter = switchJoinRightColumns( - type, maps_, block.rows(), key_columns, key_sizes, added.columns, null_map, right_indexes, offsets_to_replicate); + type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); - for (size_t i = 0; i < added.columns.size(); ++i) + for (size_t i = 0; i < added.size(); ++i) block.insert(added.moveColumn(i)); /// Filter & insert missing rows From 2713d36ce81e16292472409d8c4787a0aa639740 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 17:49:05 +0300 Subject: [PATCH 17/78] syntax: remove tab --- dbms/src/Interpreters/Join.cpp | 328 +++++++++++++++++---------------- 1 file changed, 165 insertions(+), 163 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 9a290fb0c60..a9f13429bb5 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -476,183 +476,185 @@ bool Join::insertFromBlock(const Block & block) namespace { - class AddedColumns + +class AddedColumns +{ +public: + using TypeAndNames = std::vector>; + + AddedColumns(size_t reserve) { - public: - using TypeAndNames = std::vector>; - - AddedColumns(size_t reserve) - { - columns.reserve(reserve); - type_name.reserve(reserve); - right_indexes.reserve(reserve); - } - - size_t size() const { return columns.size(); } - - void add(const ColumnWithTypeAndName & src_column, size_t idx) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name); - right_indexes.push_back(idx); - } - - ColumnWithTypeAndName moveColumn(size_t i) - { - return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); - } - - void appendFromBlock(const Block & block, size_t row_num) - { - for (size_t j = 0; j < right_indexes.size(); ++j) - columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); - } - - void appendDefaultRow() - { - for (size_t j = 0; j < right_indexes.size(); ++j) - columns[j]->insertDefault(); - } - - private: - TypeAndNames type_name; - MutableColumns columns; - std::vector right_indexes; - }; - - AddedColumns calcAddedColumns(const Block & sample_block_with_columns_to_add, - const Block & block_with_columns_to_add, - const Block & block, size_t num_columns_to_skip) - { - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - - AddedColumns additional_columns(num_columns_to_add); - - for (size_t i = 0; i < num_columns_to_add; ++i) - { - const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - - /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) - additional_columns.add(src_column, num_columns_to_skip + i); - } - - return additional_columns; + columns.reserve(reserve); + type_name.reserve(reserve); + right_indexes.reserve(reserve); } - template - void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]]) - { - if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) - { - added.appendFromBlock(*mapped.block, mapped.row_num); - } + size_t size() const { return columns.size(); } - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) - { - for (auto current = &static_cast(mapped); current != nullptr; current = current->next) - { - added.appendFromBlock(*current->block, current->row_num); - ++current_offset; - } - } - }; - - template - void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) + void add(const ColumnWithTypeAndName & src_column, size_t idx) { - if constexpr (_add_missing) + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + right_indexes.push_back(idx); + } + + ColumnWithTypeAndName moveColumn(size_t i) + { + return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); + } + + void appendFromBlock(const Block & block, size_t row_num) + { + for (size_t j = 0; j < right_indexes.size(); ++j) + columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); + } + + void appendDefaultRow() + { + for (size_t j = 0; j < right_indexes.size(); ++j) + columns[j]->insertDefault(); + } + +private: + TypeAndNames type_name; + MutableColumns columns; + std::vector right_indexes; +}; + +AddedColumns calcAddedColumns(const Block & sample_block_with_columns_to_add, + const Block & block_with_columns_to_add, + const Block & block, size_t num_columns_to_skip) +{ + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + AddedColumns additional_columns(num_columns_to_add); + + for (size_t i = 0; i < num_columns_to_add; ++i) + { + const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); + + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) + additional_columns.add(src_column, num_columns_to_skip + i); + } + + return additional_columns; +} + +template +void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]]) +{ + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) + { + added.appendFromBlock(*mapped.block, mapped.row_num); + } + + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + { + for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - added.appendDefaultRow(); + added.appendFromBlock(*current->block, current->row_num); ++current_offset; } } +}; - /// Joins right table columns which indexes are present in right_indexes using specified map. - /// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). - template - std::unique_ptr NO_INLINE joinRightIndexedColumns( - const Map & map, size_t rows, KeyGetter & key_getter, - AddedColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter) +template +void addNotFoundRow(AddedColumns & added [[maybe_unused]], IColumn::Offset & current_offset [[maybe_unused]]) +{ + if constexpr (_add_missing) { - std::unique_ptr offsets_to_replicate; - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) - offsets_to_replicate = std::make_unique(rows); - - IColumn::Offset current_offset = 0; - Arena pool; - - for (size_t i = 0; i < rows; ++i) - { - if (_has_null_map && (*null_map)[i]) - { - addNotFoundRow<_add_missing>(added_columns, current_offset); - } - else - { - auto find_result = key_getter.findKey(map, i, pool); - - if (find_result.isFound()) - { - filter[i] = 1; - auto & mapped = find_result.getMapped(); - mapped.setUsed(); - addFoundRow(mapped, added_columns, current_offset); - } - else - addNotFoundRow<_add_missing>(added_columns, current_offset); - } - - if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) - (*offsets_to_replicate)[i] = current_offset; - } - - return offsets_to_replicate; - } - - template - IColumn::Filter joinRightColumns( - const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) - { - constexpr bool left_or_full = static_in_v; - - IColumn::Filter filter(rows, 0); - KeyGetter key_getter(key_columns, key_sizes, nullptr); - - if (null_map) - offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_getter, added_columns, null_map, filter); - else - offsets_to_replicate = joinRightIndexedColumns( - map, rows, key_getter, added_columns, null_map, filter); - - return filter; - } - - template - IColumn::Filter switchJoinRightColumns( - Join::Type type, - const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, - AddedColumns & added_columns, ConstNullMapPtr null_map, - std::unique_ptr & offsets_to_replicate) - { - switch (type) - { - #define M(TYPE) \ - case Join::Type::TYPE: \ - return joinRightColumns>::Type>(\ - *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); - APPLY_FOR_JOIN_VARIANTS(M) - #undef M - - default: - throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); - } + added.appendDefaultRow(); + ++current_offset; } } +/// Joins right table columns which indexes are present in right_indexes using specified map. +/// Makes filter (1 if row presented in right table) and returns offsets to replicate (for ALL JOINS). +template +std::unique_ptr NO_INLINE joinRightIndexedColumns( + const Map & map, size_t rows, KeyGetter & key_getter, + AddedColumns & added_columns, ConstNullMapPtr null_map, IColumn::Filter & filter) +{ + std::unique_ptr offsets_to_replicate; + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + offsets_to_replicate = std::make_unique(rows); + + IColumn::Offset current_offset = 0; + Arena pool; + + for (size_t i = 0; i < rows; ++i) + { + if (_has_null_map && (*null_map)[i]) + { + addNotFoundRow<_add_missing>(added_columns, current_offset); + } + else + { + auto find_result = key_getter.findKey(map, i, pool); + + if (find_result.isFound()) + { + filter[i] = 1; + auto & mapped = find_result.getMapped(); + mapped.setUsed(); + addFoundRow(mapped, added_columns, current_offset); + } + else + addNotFoundRow<_add_missing>(added_columns, current_offset); + } + + if constexpr (STRICTNESS == ASTTableJoin::Strictness::All) + (*offsets_to_replicate)[i] = current_offset; + } + + return offsets_to_replicate; +} + +template +IColumn::Filter joinRightColumns( + const Map & map, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + AddedColumns & added_columns, ConstNullMapPtr null_map, std::unique_ptr & offsets_to_replicate) +{ + constexpr bool left_or_full = static_in_v; + + IColumn::Filter filter(rows, 0); + KeyGetter key_getter(key_columns, key_sizes, nullptr); + + if (null_map) + offsets_to_replicate = joinRightIndexedColumns( + map, rows, key_getter, added_columns, null_map, filter); + else + offsets_to_replicate = joinRightIndexedColumns( + map, rows, key_getter, added_columns, null_map, filter); + + return filter; +} + +template +IColumn::Filter switchJoinRightColumns( + Join::Type type, + const Maps & maps_, size_t rows, const ColumnRawPtrs & key_columns, const Sizes & key_sizes, + AddedColumns & added_columns, ConstNullMapPtr null_map, + std::unique_ptr & offsets_to_replicate) +{ + switch (type) + { + #define M(TYPE) \ + case Join::Type::TYPE: \ + return joinRightColumns>::Type>(\ + *maps_.TYPE, rows, key_columns, key_sizes, added_columns, null_map, offsets_to_replicate); + APPLY_FOR_JOIN_VARIANTS(M) + #undef M + + default: + throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_SET_DATA_VARIANT); + } +} + +} /// nameless + template void Join::joinBlockImpl( From efa4a2bb1810e740222a769f8f44612fd3f50529 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 18:15:44 +0300 Subject: [PATCH 18/78] minor changes --- dbms/src/Interpreters/Join.cpp | 55 +++++++++++++++------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index a9f13429bb5..649374494f3 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -482,23 +482,28 @@ class AddedColumns public: using TypeAndNames = std::vector>; - AddedColumns(size_t reserve) + AddedColumns(const Block & sample_block_with_columns_to_add, + const Block & block_with_columns_to_add, + const Block & block, size_t num_columns_to_skip) { - columns.reserve(reserve); - type_name.reserve(reserve); - right_indexes.reserve(reserve); + size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); + + columns.reserve(num_columns_to_add); + type_name.reserve(num_columns_to_add); + right_indexes.reserve(num_columns_to_add); + + for (size_t i = 0; i < num_columns_to_add; ++i) + { + const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); + + /// Don't insert column if it's in left block or not explicitly required. + if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) + addColumn(src_column, num_columns_to_skip + i); + } } size_t size() const { return columns.size(); } - void add(const ColumnWithTypeAndName & src_column, size_t idx) - { - columns.push_back(src_column.column->cloneEmpty()); - columns.back()->reserve(src_column.column->size()); - type_name.emplace_back(src_column.type, src_column.name); - right_indexes.push_back(idx); - } - ColumnWithTypeAndName moveColumn(size_t i) { return ColumnWithTypeAndName(std::move(columns[i]), type_name[i].first, type_name[i].second); @@ -520,27 +525,15 @@ private: TypeAndNames type_name; MutableColumns columns; std::vector right_indexes; -}; -AddedColumns calcAddedColumns(const Block & sample_block_with_columns_to_add, - const Block & block_with_columns_to_add, - const Block & block, size_t num_columns_to_skip) -{ - size_t num_columns_to_add = sample_block_with_columns_to_add.columns(); - - AddedColumns additional_columns(num_columns_to_add); - - for (size_t i = 0; i < num_columns_to_add; ++i) + void addColumn(const ColumnWithTypeAndName & src_column, size_t idx) { - const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.safeGetByPosition(i); - - /// Don't insert column if it's in left block or not explicitly required. - if (!block.has(src_column.name) && block_with_columns_to_add.has(src_column.name)) - additional_columns.add(src_column, num_columns_to_skip + i); + columns.push_back(src_column.column->cloneEmpty()); + columns.back()->reserve(src_column.column->size()); + type_name.emplace_back(src_column.type, src_column.name); + right_indexes.push_back(idx); } - - return additional_columns; -} +}; template void addFoundRow(const typename Map::mapped_type & mapped, AddedColumns & added, IColumn::Offset & current_offset [[maybe_unused]]) @@ -714,7 +707,7 @@ void Join::joinBlockImpl( /// Add new columns to the block. - AddedColumns added = calcAddedColumns(sample_block_with_columns_to_add, block_with_columns_to_add, block, num_columns_to_skip); + AddedColumns added(sample_block_with_columns_to_add, block_with_columns_to_add, block, num_columns_to_skip); std::unique_ptr offsets_to_replicate; From 2ea11f8528f77ec34e884cdf93a69cab93589b27 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 18:52:41 +0300 Subject: [PATCH 19/78] more Joins.cpp refactoring (constexpr if) --- dbms/src/Interpreters/Join.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 649374494f3..bafb98fd703 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -682,7 +682,8 @@ void Join::joinBlockImpl( * Because if they are constants, then in the "not joined" rows, they may have different values * - default values, which can differ from the values of these constants. */ - if (isRightOrFull(kind)) + constexpr bool right_or_full = static_in_v; + if constexpr (right_or_full) { for (size_t i = 0; i < existing_columns; ++i) { @@ -702,7 +703,7 @@ void Join::joinBlockImpl( * but they will not be used at this stage of joining (and will be in `AdderNonJoined`), and they need to be skipped. */ size_t num_columns_to_skip = 0; - if (isRightOrFull(kind)) + if constexpr (right_or_full) num_columns_to_skip = keys_size; /// Add new columns to the block. @@ -721,9 +722,10 @@ void Join::joinBlockImpl( NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); - if (strictness == ASTTableJoin::Strictness::Any) + if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { - if (isInnerOrRight(kind)) + constexpr bool inner_or_right = static_in_v; + if constexpr (inner_or_right) { /// If ANY INNER | RIGHT JOIN - filter all the columns except the new ones. for (size_t i = 0; i < existing_columns; ++i) From b1f4cc6ed24fca22159efe7c199207329c2f68da Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 20 Mar 2019 20:20:14 +0300 Subject: [PATCH 20/78] fix left join with const column --- dbms/src/Interpreters/Join.cpp | 20 ++++++++-------- ...916_join_using_duplicate_columns.reference | 15 ++++++++++++ .../00916_join_using_duplicate_columns.sql | 23 ++++++++++++++++++- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index bafb98fd703..3336f175ebe 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -755,13 +755,13 @@ void Join::joinBlockImpl( if (needed_key_names_right.count(right_name) && !block.has(right_name)) { const auto & col = block.getByName(left_name); - auto & column = col.column; + ColumnPtr column = col.column->convertToFullColumnIfConst(); MutableColumnPtr mut_column = column->cloneEmpty(); - for (size_t col_no = 0; col_no < filter.size(); ++col_no) + for (size_t row = 0; row < filter.size(); ++row) { - if (filter[col_no]) - mut_column->insertFrom(*column, col_no); + if (filter[row]) + mut_column->insertFrom(*column, row); else mut_column->insertDefault(); } @@ -785,22 +785,22 @@ void Join::joinBlockImpl( if (needed_key_names_right.count(right_name) && !block.has(right_name)) { const auto & col = block.getByName(left_name); - auto & column = col.column; + ColumnPtr column = col.column->convertToFullColumnIfConst(); MutableColumnPtr mut_column = column->cloneEmpty(); size_t last_offset = 0; - for (size_t col_no = 0; col_no < column->size(); ++col_no) + for (size_t row = 0; row < column->size(); ++row) { - if (size_t to_insert = (*offsets_to_replicate)[col_no] - last_offset) + if (size_t to_insert = (*offsets_to_replicate)[row] - last_offset) { - if (!filter[col_no]) + if (!filter[row]) mut_column->insertDefault(); else for (size_t dup = 0; dup < to_insert; ++dup) - mut_column->insertFrom(*column, col_no); + mut_column->insertFrom(*column, row); } - last_offset = (*offsets_to_replicate)[col_no]; + last_offset = (*offsets_to_replicate)[row]; } block.insert({std::move(mut_column), col.type, right_name}); diff --git a/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.reference b/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.reference index be9a5a74a14..1e896055e15 100644 --- a/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.reference +++ b/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.reference @@ -3,3 +3,18 @@ 1 1 1 1 +1 0 +1 0 +1 0 +1 1 +1 0 +1 0 +1 0 +1 1 +0 2 +0 2 +0 2 +1 1 +0 2 +0 2 +0 2 diff --git a/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.sql b/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.sql index 97cd1e8cac8..a30aee7f61f 100644 --- a/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.sql +++ b/dbms/tests/queries/0_stateless/00916_join_using_duplicate_columns.sql @@ -5,4 +5,25 @@ SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 1 AS x) AS t2 USING x; SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 2 AS x) AS t2 USING x; SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 1 AS x) AS t2 ON t1.x = t2.x; --- (bug) SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT materialize(1) AS x) AS t1 ALL LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT materialize(2) AS x) AS t2 ON t1.x = t2.x; + +SELECT * FROM (SELECT 1 AS x) AS t1 ANY LEFT JOIN (SELECT 1 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ANY LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT materialize(1) AS x) AS t1 ANY LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ANY LEFT JOIN (SELECT materialize(2) AS x) AS t2 ON t1.x = t2.x; + +SELECT * FROM (SELECT 1 AS x) AS t1 ALL RIGHT JOIN (SELECT 1 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ALL RIGHT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT materialize(1) AS x) AS t1 ALL RIGHT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ALL RIGHT JOIN (SELECT materialize(2) AS x) AS t2 ON t1.x = t2.x; + +SELECT * FROM (SELECT 1 AS x) AS t1 ANY RIGHT JOIN (SELECT 1 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ANY RIGHT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT materialize(1) AS x) AS t1 ANY RIGHT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +SELECT * FROM (SELECT 1 AS x) AS t1 ANY RIGHT JOIN (SELECT materialize(2) AS x) AS t2 ON t1.x = t2.x; + +-- SET join_use_nulls = 1; +-- SELECT * FROM (SELECT 1 AS x) AS t1 ALL LEFT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; +-- SELECT * FROM (SELECT 1 AS x) AS t1 ALL RIGHT JOIN (SELECT 2 AS x) AS t2 ON t1.x = t2.x; From 001de1a340deab9164c40603c90e002b486aedb6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 24 Mar 2019 02:01:57 +0300 Subject: [PATCH 21/78] Always backquote column names in CREATE/ATTACH queries --- dbms/src/Parsers/ASTColumnDeclaration.cpp | 4 +++- dbms/src/Parsers/IAST.cpp | 11 ++++++++++- dbms/src/Parsers/IAST.h | 4 +++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp index abc0ad7076f..bf8cf8e0861 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.cpp +++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp @@ -41,7 +41,9 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta frame.need_parens = false; std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); - settings.ostr << settings.nl_or_ws << indent_str << backQuoteIfNeed(name); + /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. + settings.ostr << settings.nl_or_ws << indent_str << backQuote(name); + if (type) { settings.ostr << ' '; diff --git a/dbms/src/Parsers/IAST.cpp b/dbms/src/Parsers/IAST.cpp index 74956c43166..b2014cc0f44 100644 --- a/dbms/src/Parsers/IAST.cpp +++ b/dbms/src/Parsers/IAST.cpp @@ -25,7 +25,6 @@ const char * IAST::hilite_alias = "\033[0;32m"; const char * IAST::hilite_none = "\033[0m"; -/// Quote the identifier with backquotes, if required. String backQuoteIfNeed(const String & x) { String res(x.size(), '\0'); @@ -36,6 +35,16 @@ String backQuoteIfNeed(const String & x) return res; } +String backQuote(const String & x) +{ + String res(x.size(), '\0'); + { + WriteBufferFromString wb(res); + writeBackQuotedString(x, wb); + } + return res; +} + size_t IAST::checkSize(size_t max_size) const { diff --git a/dbms/src/Parsers/IAST.h b/dbms/src/Parsers/IAST.h index 65572d922d2..89ab8fb05c3 100644 --- a/dbms/src/Parsers/IAST.h +++ b/dbms/src/Parsers/IAST.h @@ -208,7 +208,9 @@ private: }; -/// Surrounds an identifier by back quotes if it is necessary. +/// Quote the identifier with backquotes, if required. String backQuoteIfNeed(const String & x); +/// Quote the identifier with backquotes. +String backQuote(const String & x); } From 2dca389f20ffeb1821749b4e2e282804153b8409 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 24 Mar 2019 04:42:58 +0300 Subject: [PATCH 22/78] Attempt to make compositions with COWPtr more convenient --- dbms/src/Columns/IColumn.h | 4 +- dbms/src/Common/COWPtr.h | 70 +++++++++----- dbms/src/Common/tests/CMakeLists.txt | 3 + dbms/src/Common/tests/cow_columns.cpp | 4 +- dbms/src/Common/tests/cow_compositions.cpp | 106 +++++++++++++++++++++ 5 files changed, 160 insertions(+), 27 deletions(-) create mode 100644 dbms/src/Common/tests/cow_compositions.cpp diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index b7df53ed148..e98e406368d 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -272,8 +272,8 @@ public: MutablePtr mutate() const && { - MutablePtr res = COWPtr::mutate(); - res->forEachSubcolumn([](Ptr & subcolumn) { subcolumn = (*std::move(subcolumn)).mutate(); }); + MutablePtr res = std::move(*this).template COWPtr::mutate(); + res->forEachSubcolumn([](Ptr & subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); return res; } diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 525f2372c6c..4ec63c5744b 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -50,7 +50,7 @@ /// Change value of x. { /// Creating mutable ptr. It can clone an object under the hood if it was shared. - Column::MutablePtr mutate_x = x->mutate(); + Column::MutablePtr mutate_x = std::move(*x).mutate(); /// Using non-const methods of an object. mutate_x->set(2); /// Assigning pointer 'x' to mutated object. @@ -175,7 +175,7 @@ public: Ptr getPtr() const { return static_cast(derived()); } MutablePtr getPtr() { return static_cast(derived()); } - MutablePtr mutate() const + MutablePtr mutate() const && { if (this->use_count() > 1) return derived()->clone(); @@ -192,6 +192,49 @@ public: { return const_cast(*derived()); } + +protected: + /// It works as immutable_ptr if it is const and as mutable_ptr if it is non const. + template + class chameleon_ptr + { + private: + immutable_ptr value; + + public: + template + chameleon_ptr(Args &&... args) : value(std::forward(args)...) {} + + template + chameleon_ptr(std::initializer_list && arg) : value(std::forward>(arg)) {} + + const T * get() const { return value.get(); } + T * get() { return value->assumeMutable().get(); } + + const T * operator->() const { return get(); } + T * operator->() { return get(); } + + const T & operator*() const { return *value; } + T & operator*() { return value->assumeMutableRef(); } + + operator bool() const { return value; } + }; + +public: + /** Use this type in class members for compositions. + * + * NOTE: + * For classes with WrappedPtr members, + * you must reimplement 'mutate' method, so it will call 'mutate' of all subobjects (do deep mutate). + * It will guarantee, that mutable object have all subobjects unshared. + * + * NOTE: + * If you override 'mutate' method in inherited classes, don't forget to make it virtual in base class. + * (COWPtr itself doesn't force any methods to be virtual). + * + * See example in "cow_compositions.cpp". + */ + using WrappedPtr = chameleon_ptr; }; @@ -217,6 +260,8 @@ public: * IColumn * CowPtr * boost::intrusive_ref_counter + * + * See example in "cow_columns.cpp". */ template class COWPtrHelper : public Base @@ -237,24 +282,3 @@ public: typename Base::MutablePtr clone() const override { return typename Base::MutablePtr(new Derived(*derived())); } }; - - -/** Compositions. - * - * Sometimes your objects contain another objects, and you have tree-like structure. - * And you want non-const methods of your object to also modify your subobjects. - * - * There are the following possible solutions: - * - * 1. Store subobjects as immutable ptrs. Call mutate method of subobjects inside non-const methods of your objects; modify them and assign back. - * Drawback: additional checks inside methods: CPU overhead on atomic ops. - * - * 2. Store subobjects as mutable ptrs. Subobjects cannot be shared in another objects. - * Drawback: it's not possible to share subobjects. - * - * 3. Store subobjects as immutable ptrs. Implement copy-constructor to do shallow copy. - * But reimplement 'mutate' method, so it will call 'mutate' of all subobjects (do deep mutate). - * It will guarantee, that mutable object have all subobjects unshared. - * From non-const method, you can modify subobjects with 'assumeMutableRef' method. - * Drawback: it's more complex than other solutions. - */ diff --git a/dbms/src/Common/tests/CMakeLists.txt b/dbms/src/Common/tests/CMakeLists.txt index 5d995c17bf3..1c6c7e9f504 100644 --- a/dbms/src/Common/tests/CMakeLists.txt +++ b/dbms/src/Common/tests/CMakeLists.txt @@ -82,5 +82,8 @@ target_link_libraries (allocator PRIVATE clickhouse_common_io) add_executable (cow_columns cow_columns.cpp) target_link_libraries (cow_columns PRIVATE clickhouse_common_io) +add_executable (cow_compositions cow_compositions.cpp) +target_link_libraries (cow_compositions PRIVATE clickhouse_common_io) + add_executable (stopwatch stopwatch.cpp) target_link_libraries (stopwatch PRIVATE clickhouse_common_io) diff --git a/dbms/src/Common/tests/cow_columns.cpp b/dbms/src/Common/tests/cow_columns.cpp index 5d123f3c8af..cf48c159a96 100644 --- a/dbms/src/Common/tests/cow_columns.cpp +++ b/dbms/src/Common/tests/cow_columns.cpp @@ -53,7 +53,7 @@ int main(int, char **) std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; { - MutableColumnPtr mut = y->mutate(); + MutableColumnPtr mut = std::move(*y).mutate(); mut->set(2); std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; @@ -72,7 +72,7 @@ int main(int, char **) std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; { - MutableColumnPtr mut = y->mutate(); + MutableColumnPtr mut = std::move(*y).mutate(); mut->set(3); std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; diff --git a/dbms/src/Common/tests/cow_compositions.cpp b/dbms/src/Common/tests/cow_compositions.cpp new file mode 100644 index 00000000000..31e9451b8b7 --- /dev/null +++ b/dbms/src/Common/tests/cow_compositions.cpp @@ -0,0 +1,106 @@ +#include +#include + + +class IColumn : public COWPtr +{ +private: + friend class COWPtr; + virtual MutablePtr clone() const = 0; + +public: + IColumn() = default; + IColumn(const IColumn &) = default; + virtual ~IColumn() = default; + + virtual int get() const = 0; + virtual void set(int value) = 0; + + virtual MutablePtr mutate() const && { return std::move(*this).template COWPtr::mutate(); } +}; + +using ColumnPtr = IColumn::Ptr; +using MutableColumnPtr = IColumn::MutablePtr; + +class ConcreteColumn : public COWPtrHelper +{ +private: + friend class COWPtrHelper; + + int data; + ConcreteColumn(int data) : data(data) {} + ConcreteColumn(const ConcreteColumn &) = default; + +public: + int get() const override { return data; } + void set(int value) override { data = value; } +}; + +class ColumnComposition : public COWPtrHelper +{ +private: + using Base = COWPtrHelper; + friend class COWPtrHelper; + + ConcreteColumn::WrappedPtr wrapped; + + ColumnComposition(int data) : wrapped(ConcreteColumn::create(data)) {} + ColumnComposition(const ColumnComposition &) = default; + +public: + int get() const override { return wrapped->get(); } + void set(int value) override { wrapped->set(value); } + + IColumn::MutablePtr mutate() const && override + { + std::cerr << "Mutating\n"; + auto res = std::move(*this).Base::mutate(); + static_cast(res.get())->wrapped = std::move(*wrapped).mutate(); + return res; + } +}; + + +int main(int, char **) +{ + ColumnPtr x = ColumnComposition::create(1); + ColumnPtr y = x; + + std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; + + { + MutableColumnPtr mut = std::move(*y).mutate(); + mut->set(2); + + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + y = std::move(mut); + } + + std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; + + x = ColumnComposition::create(0); + + std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << y.get() << "\n"; + + { + MutableColumnPtr mut = std::move(*y).mutate(); + mut->set(3); + + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << ", " << mut->use_count() << "\n"; + std::cerr << "addresses: " << x.get() << ", " << y.get() << ", " << mut.get() << "\n"; + y = std::move(mut); + } + + std::cerr << "values: " << x->get() << ", " << y->get() << "\n"; + std::cerr << "refcounts: " << x->use_count() << ", " << y->use_count() << "\n"; + + return 0; +} + From c76a4e9dd2fbb4d7add9a74f77b9b1852069498f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 24 Mar 2019 05:21:01 +0300 Subject: [PATCH 23/78] Attempt to make compositions with COWPtr more convenient --- dbms/src/Columns/IColumn.h | 2 +- dbms/src/Common/COWPtr.h | 12 +++++++++++- dbms/src/Common/tests/cow_compositions.cpp | 21 +++++++++++---------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index e98e406368d..bd5b5aa0e14 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -272,7 +272,7 @@ public: MutablePtr mutate() const && { - MutablePtr res = std::move(*this).template COWPtr::mutate(); + MutablePtr res = shallowMutate(); res->forEachSubcolumn([](Ptr & subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); return res; } diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 4ec63c5744b..8017df82cb1 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -175,7 +175,8 @@ public: Ptr getPtr() const { return static_cast(derived()); } MutablePtr getPtr() { return static_cast(derived()); } - MutablePtr mutate() const && +protected: + MutablePtr shallowMutate() const { if (this->use_count() > 1) return derived()->clone(); @@ -183,6 +184,12 @@ public: return assumeMutable(); } +public: + MutablePtr mutate() const && + { + return shallowMutate(); + } + MutablePtr assumeMutable() const { return const_cast(this)->getPtr(); @@ -281,4 +288,7 @@ public: static MutablePtr create(std::initializer_list && arg) { return create(std::forward>(arg)); } typename Base::MutablePtr clone() const override { return typename Base::MutablePtr(new Derived(*derived())); } + +protected: + MutablePtr shallowMutate() const { return MutablePtr(static_cast(Base::shallowMutate().get())); } }; diff --git a/dbms/src/Common/tests/cow_compositions.cpp b/dbms/src/Common/tests/cow_compositions.cpp index 31e9451b8b7..76b8a2987ab 100644 --- a/dbms/src/Common/tests/cow_compositions.cpp +++ b/dbms/src/Common/tests/cow_compositions.cpp @@ -6,7 +6,9 @@ class IColumn : public COWPtr { private: friend class COWPtr; + virtual MutablePtr clone() const = 0; + virtual MutablePtr deepMutate() const { return shallowMutate(); } public: IColumn() = default; @@ -16,7 +18,7 @@ public: virtual int get() const = 0; virtual void set(int value) = 0; - virtual MutablePtr mutate() const && { return std::move(*this).template COWPtr::mutate(); } + MutablePtr mutate() const && { return deepMutate(); } }; using ColumnPtr = IColumn::Ptr; @@ -39,7 +41,6 @@ public: class ColumnComposition : public COWPtrHelper { private: - using Base = COWPtrHelper; friend class COWPtrHelper; ConcreteColumn::WrappedPtr wrapped; @@ -47,17 +48,17 @@ private: ColumnComposition(int data) : wrapped(ConcreteColumn::create(data)) {} ColumnComposition(const ColumnComposition &) = default; + IColumn::MutablePtr deepMutate() const override + { + std::cerr << "Mutating\n"; + auto res = shallowMutate(); + res->wrapped = std::move(*wrapped).mutate(); + return res; + } + public: int get() const override { return wrapped->get(); } void set(int value) override { wrapped->set(value); } - - IColumn::MutablePtr mutate() const && override - { - std::cerr << "Mutating\n"; - auto res = std::move(*this).Base::mutate(); - static_cast(res.get())->wrapped = std::move(*wrapped).mutate(); - return res; - } }; From af5193736ac2339688b1a4c0782c333a7ff4ebcf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 24 Mar 2019 05:22:39 +0300 Subject: [PATCH 24/78] Attempt to make compositions with COWPtr more convenient --- dbms/src/Common/COWPtr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 8017df82cb1..bc10757cea4 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -236,7 +236,7 @@ public: * It will guarantee, that mutable object have all subobjects unshared. * * NOTE: - * If you override 'mutate' method in inherited classes, don't forget to make it virtual in base class. + * If you override 'mutate' method in inherited classes, don't forget to make it virtual in base class or to make it call a virtual method. * (COWPtr itself doesn't force any methods to be virtual). * * See example in "cow_compositions.cpp". From fb19cf1eec0b524578d373941483d038eee65879 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 25 Mar 2019 04:43:54 +0300 Subject: [PATCH 25/78] Attempt to make compositions with COWPtr more convenient --- dbms/src/Columns/ColumnArray.cpp | 4 +-- dbms/src/Columns/ColumnArray.h | 10 +++---- dbms/src/Columns/ColumnConst.h | 11 +++----- dbms/src/Columns/ColumnLowCardinality.cpp | 4 +-- dbms/src/Columns/ColumnLowCardinality.h | 18 ++++++------ dbms/src/Columns/ColumnNullable.h | 9 +++--- dbms/src/Columns/ColumnTuple.cpp | 28 +++++++++---------- dbms/src/Columns/ColumnTuple.h | 9 ++++-- dbms/src/Columns/ColumnUnique.h | 15 ++++------ dbms/src/Columns/IColumn.h | 4 +-- dbms/src/Common/COWPtr.h | 5 +++- dbms/src/DataStreams/ColumnGathererStream.cpp | 7 ++--- .../DataTypeLowCardinalityHelpers.cpp | 4 +-- dbms/src/DataTypes/NestedUtils.cpp | 2 +- dbms/src/Functions/FunctionHelpers.cpp | 2 +- dbms/src/Functions/FunctionsComparison.h | 4 +-- .../Functions/FunctionsExternalDictionaries.h | 10 +++---- dbms/src/Functions/FunctionsGeo.cpp | 2 +- dbms/src/Functions/FunctionsHashing.h | 4 +-- .../Functions/GatherUtils/createArraySink.cpp | 3 +- dbms/src/Functions/arrayElement.cpp | 2 +- dbms/src/Functions/arrayIntersect.cpp | 2 +- dbms/src/Functions/if.cpp | 4 +-- dbms/src/Functions/in.cpp | 2 +- dbms/src/Storages/StorageJoin.cpp | 2 +- 25 files changed, 80 insertions(+), 87 deletions(-) diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index eeb06b64f49..63f01bf1a59 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -576,7 +576,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint /// Make temporary arrays for each components of Tuple, then filter and collect back. - size_t tuple_size = tuple.getColumns().size(); + size_t tuple_size = tuple.tupleSize(); if (tuple_size == 0) throw Exception("Logical error: empty tuple", ErrorCodes::LOGICAL_ERROR); @@ -941,7 +941,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const /// Make temporary arrays for each components of Tuple. In the same way as for Nullable. - size_t tuple_size = tuple.getColumns().size(); + size_t tuple_size = tuple.tupleSize(); if (tuple_size == 0) throw Exception("Logical error: empty tuple", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Columns/ColumnArray.h b/dbms/src/Columns/ColumnArray.h index 41e38953cf1..85df5550d4e 100644 --- a/dbms/src/Columns/ColumnArray.h +++ b/dbms/src/Columns/ColumnArray.h @@ -81,15 +81,15 @@ public: bool hasEqualOffsets(const ColumnArray & other) const; /** More efficient methods of manipulation */ - IColumn & getData() { return data->assumeMutableRef(); } + IColumn & getData() { return *data; } const IColumn & getData() const { return *data; } - IColumn & getOffsetsColumn() { return offsets->assumeMutableRef(); } + IColumn & getOffsetsColumn() { return *offsets; } const IColumn & getOffsetsColumn() const { return *offsets; } Offsets & ALWAYS_INLINE getOffsets() { - return static_cast(offsets->assumeMutableRef()).getData(); + return static_cast(*offsets).getData(); } const Offsets & ALWAYS_INLINE getOffsets() const @@ -124,8 +124,8 @@ public: } private: - ColumnPtr data; - ColumnPtr offsets; + WrappedPtr data; + WrappedPtr offsets; size_t ALWAYS_INLINE offsetAt(ssize_t i) const { return getOffsets()[i - 1]; } size_t ALWAYS_INLINE sizeAt(ssize_t i) const { return getOffsets()[i] - getOffsets()[i - 1]; } diff --git a/dbms/src/Columns/ColumnConst.h b/dbms/src/Columns/ColumnConst.h index 53ac5b24220..87371895840 100644 --- a/dbms/src/Columns/ColumnConst.h +++ b/dbms/src/Columns/ColumnConst.h @@ -23,7 +23,7 @@ class ColumnConst final : public COWPtrHelper private: friend class COWPtrHelper; - ColumnPtr data; + WrappedPtr data; size_t s; ColumnConst(const ColumnPtr & data, size_t s); @@ -141,9 +141,8 @@ public: const char * deserializeAndInsertFromArena(const char * pos) override { - auto & mutable_data = data->assumeMutableRef(); - auto res = mutable_data.deserializeAndInsertFromArena(pos); - mutable_data.popBack(1); + auto res = data->deserializeAndInsertFromArena(pos); + data->popBack(1); ++s; return res; } @@ -208,11 +207,9 @@ public: /// Not part of the common interface. - IColumn & getDataColumn() { return data->assumeMutableRef(); } + IColumn & getDataColumn() { return *data; } const IColumn & getDataColumn() const { return *data; } - //MutableColumnPtr getDataColumnMutablePtr() { return data; } const ColumnPtr & getDataColumnPtr() const { return data; } - //ColumnPtr & getDataColumnPtr() { return data; } Field getField() const { return getDataColumn()[0]; } diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index c9a475fd8a6..55d98f9ce22 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -522,7 +522,7 @@ void ColumnLowCardinality::Index::insertPosition(UInt64 position) while (position > getMaxPositionForCurrentType()) expandType(); - positions->assumeMutableRef().insert(position); + positions->insert(position); checkSizeOfType(); } @@ -540,7 +540,7 @@ void ColumnLowCardinality::Index::insertPositionsRange(const IColumn & column, U convertPositions(); if (size_of_type == sizeof(ColumnType)) - positions->assumeMutableRef().insertRangeFrom(column, offset, limit); + positions->insertRangeFrom(column, offset, limit); else { auto copy = [&](auto cur_type) diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index 3b816b2449c..f93e7cb80e7 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -149,10 +149,10 @@ public: const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); } const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); } - /// IColumnUnique & getUnique() { return static_cast(*column_unique->assumeMutable()); } + /// IColumnUnique & getUnique() { return static_cast(*column_unique); } /// ColumnPtr getUniquePtr() const { return column_unique; } - /// IColumn & getIndexes() { return idx.getPositions()->assumeMutableRef(); } + /// IColumn & getIndexes() { return *idx.getPositions(); } const IColumn & getIndexes() const { return *idx.getPositions(); } const ColumnPtr & getIndexesPtr() const { return idx.getPositions(); } size_t getSizeOfIndexType() const { return idx.getSizeOfIndexType(); } @@ -202,13 +202,13 @@ public: explicit Index(ColumnPtr positions); const ColumnPtr & getPositions() const { return positions; } - ColumnPtr & getPositionsPtr() { return positions; } + WrappedPtr & getPositionsPtr() { return positions; } size_t getPositionAt(size_t row) const; void insertPosition(UInt64 position); void insertPositionsRange(const IColumn & column, UInt64 offset, UInt64 limit); - void popBack(size_t n) { positions->assumeMutableRef().popBack(n); } - void reserve(size_t n) { positions->assumeMutableRef().reserve(n); } + void popBack(size_t n) { positions->popBack(n); } + void reserve(size_t n) { positions->reserve(n); } UInt64 getMaxPositionForCurrentType() const; @@ -224,7 +224,7 @@ public: void countKeys(ColumnUInt64::Container & counts) const; private: - ColumnPtr positions; + WrappedPtr positions; size_t size_of_type = 0; void updateSizeOfType() { size_of_type = getSizeOfIndexType(*positions, size_of_type); } @@ -252,10 +252,10 @@ private: explicit Dictionary(ColumnPtr column_unique, bool is_shared); const ColumnPtr & getColumnUniquePtr() const { return column_unique; } - ColumnPtr & getColumnUniquePtr() { return column_unique; } + WrappedPtr & getColumnUniquePtr() { return column_unique; } const IColumnUnique & getColumnUnique() const { return static_cast(*column_unique); } - IColumnUnique & getColumnUnique() { return static_cast(column_unique->assumeMutableRef()); } + IColumnUnique & getColumnUnique() { return static_cast(*column_unique); } /// Dictionary may be shared for several mutable columns. /// Immutable columns may have the same column unique, which isn't necessarily shared dictionary. @@ -266,7 +266,7 @@ private: void compact(ColumnPtr & positions); private: - ColumnPtr column_unique; + WrappedPtr column_unique; bool shared = false; void checkColumn(const IColumn & column); diff --git a/dbms/src/Columns/ColumnNullable.h b/dbms/src/Columns/ColumnNullable.h index 8a3651b378b..a66979c0683 100644 --- a/dbms/src/Columns/ColumnNullable.h +++ b/dbms/src/Columns/ColumnNullable.h @@ -106,16 +106,15 @@ public: /// Return the column that represents values. - IColumn & getNestedColumn() { return nested_column->assumeMutableRef(); } + IColumn & getNestedColumn() { return *nested_column; } const IColumn & getNestedColumn() const { return *nested_column; } const ColumnPtr & getNestedColumnPtr() const { return nested_column; } /// Return the column that represents the byte map. - //ColumnPtr & getNullMapColumnPtr() { return null_map; } const ColumnPtr & getNullMapColumnPtr() const { return null_map; } - ColumnUInt8 & getNullMapColumn() { return static_cast(null_map->assumeMutableRef()); } + ColumnUInt8 & getNullMapColumn() { return static_cast(*null_map); } const ColumnUInt8 & getNullMapColumn() const { return static_cast(*null_map); } NullMap & getNullMapData() { return getNullMapColumn().getData(); } @@ -134,8 +133,8 @@ public: void checkConsistency() const; private: - ColumnPtr nested_column; - ColumnPtr null_map; + WrappedPtr nested_column; + WrappedPtr null_map; template void applyNullMapImpl(const ColumnUInt8 & map); diff --git a/dbms/src/Columns/ColumnTuple.cpp b/dbms/src/Columns/ColumnTuple.cpp index caed6c5d6f1..a69ff147394 100644 --- a/dbms/src/Columns/ColumnTuple.cpp +++ b/dbms/src/Columns/ColumnTuple.cpp @@ -53,7 +53,7 @@ ColumnTuple::Ptr ColumnTuple::create(const Columns & columns) throw Exception{"ColumnTuple cannot have ColumnConst as its element", ErrorCodes::ILLEGAL_COLUMN}; auto column_tuple = ColumnTuple::create(MutableColumns()); - column_tuple->columns = columns; + column_tuple->columns.assign(columns.begin(), columns.end()); return column_tuple; } @@ -101,7 +101,7 @@ void ColumnTuple::insert(const Field & x) throw Exception("Cannot insert value of different size into tuple", ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); for (size_t i = 0; i < tuple_size; ++i) - columns[i]->assumeMutableRef().insert(tuple[i]); + columns[i]->insert(tuple[i]); } void ColumnTuple::insertFrom(const IColumn & src_, size_t n) @@ -113,19 +113,19 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n) throw Exception("Cannot insert value of different size into tuple", ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE); for (size_t i = 0; i < tuple_size; ++i) - columns[i]->assumeMutableRef().insertFrom(*src.columns[i], n); + columns[i]->insertFrom(*src.columns[i], n); } void ColumnTuple::insertDefault() { for (auto & column : columns) - column->assumeMutableRef().insertDefault(); + column->insertDefault(); } void ColumnTuple::popBack(size_t n) { for (auto & column : columns) - column->assumeMutableRef().popBack(n); + column->popBack(n); } StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const @@ -140,7 +140,7 @@ StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char con const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos) { for (auto & column : columns) - pos = column->assumeMutableRef().deserializeAndInsertFromArena(pos); + pos = column->deserializeAndInsertFromArena(pos); return pos; } @@ -155,7 +155,7 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) - columns[i]->assumeMutableRef().insertRangeFrom( + columns[i]->insertRangeFrom( *static_cast(src).columns[i], start, length); } @@ -238,21 +238,19 @@ int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_dire template struct ColumnTuple::Less { - ColumnRawPtrs plain_columns; + TupleColumns columns; int nan_direction_hint; - Less(const Columns & columns, int nan_direction_hint_) - : nan_direction_hint(nan_direction_hint_) + Less(const TupleColumns & columns, int nan_direction_hint_) + : columns(columns), nan_direction_hint(nan_direction_hint_) { - for (const auto & column : columns) - plain_columns.push_back(column.get()); } bool operator() (size_t a, size_t b) const { - for (ColumnRawPtrs::const_iterator it = plain_columns.begin(); it != plain_columns.end(); ++it) + for (const auto & column : columns) { - int res = (*it)->compareAt(a, b, **it, nan_direction_hint); + int res = column->compareAt(a, b, *column, nan_direction_hint); if (res < 0) return positive; else if (res > 0) @@ -319,7 +317,7 @@ size_t ColumnTuple::allocatedBytes() const void ColumnTuple::protect() { for (auto & column : columns) - column->assumeMutableRef().protect(); + column->protect(); } void ColumnTuple::getExtremes(Field & min, Field & max) const diff --git a/dbms/src/Columns/ColumnTuple.h b/dbms/src/Columns/ColumnTuple.h index d0a65a03d37..376c099c1dc 100644 --- a/dbms/src/Columns/ColumnTuple.h +++ b/dbms/src/Columns/ColumnTuple.h @@ -17,7 +17,8 @@ class ColumnTuple final : public COWPtrHelper private: friend class COWPtrHelper; - Columns columns; + using TupleColumns = std::vector; + TupleColumns columns; template struct Less; @@ -31,6 +32,7 @@ public: */ using Base = COWPtrHelper; static Ptr create(const Columns & columns); + static Ptr create(const TupleColumns & columns); static Ptr create(Columns && arg) { return create(arg); } template ::value>::type> @@ -78,9 +80,10 @@ public: size_t tupleSize() const { return columns.size(); } const IColumn & getColumn(size_t idx) const { return *columns[idx]; } - IColumn & getColumn(size_t idx) { return columns[idx]->assumeMutableRef(); } + IColumn & getColumn(size_t idx) { return *columns[idx]; } - const Columns & getColumns() const { return columns; } + const TupleColumns & getColumns() const { return columns; } + Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; } const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; } }; diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index a06863858ae..5882f9109b2 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -80,7 +80,7 @@ public: bool isNumeric() const override { return column_holder->isNumeric(); } size_t byteSize() const override { return column_holder->byteSize(); } - void protect() override { column_holder->assumeMutableRef().protect(); } + void protect() override { column_holder->protect(); } size_t allocatedBytes() const override { return column_holder->allocatedBytes() @@ -108,14 +108,14 @@ public: private: - ColumnPtr column_holder; + IColumn::WrappedPtr column_holder; bool is_nullable; size_t size_of_value_if_fixed = 0; ReverseIndex index; /// For DataTypeNullable, stores null map. - ColumnPtr nested_null_mask; - ColumnPtr nested_column_nullable; + IColumn::WrappedPtr nested_null_mask; + IColumn::WrappedPtr nested_column_nullable; class IncrementalHash { @@ -138,7 +138,7 @@ private: static size_t numSpecialValues(bool is_nullable) { return is_nullable ? 2 : 1; } size_t numSpecialValues() const { return numSpecialValues(is_nullable); } - ColumnType * getRawColumnPtr() { return static_cast(column_holder->assumeMutable().get()); } + ColumnType * getRawColumnPtr() { return static_cast(column_holder.get()); } const ColumnType * getRawColumnPtr() const { return static_cast(column_holder.get()); } template @@ -230,10 +230,7 @@ void ColumnUnique::updateNullMask() size_t size = getRawColumnPtr()->size(); if (nested_null_mask->size() != size) - { - IColumn & null_mask = nested_null_mask->assumeMutableRef(); - static_cast(null_mask).getData().resize_fill(size); - } + static_cast(*nested_null_mask).getData().resize_fill(size); } } diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index bd5b5aa0e14..9ed79c0b69c 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -259,7 +259,7 @@ public: /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. /// Shallow: doesn't do recursive calls; don't do call for itself. - using ColumnCallback = std::function; + using ColumnCallback = std::function; virtual void forEachSubcolumn(ColumnCallback) {} /// Columns have equal structure. @@ -273,7 +273,7 @@ public: MutablePtr mutate() const && { MutablePtr res = shallowMutate(); - res->forEachSubcolumn([](Ptr & subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); + res->forEachSubcolumn([](WrappedPtr & subcolumn) { subcolumn = std::move(*subcolumn).mutate(); }); return res; } diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index bc10757cea4..6a3394e1685 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -224,7 +224,10 @@ protected: const T & operator*() const { return *value; } T & operator*() { return value->assumeMutableRef(); } - operator bool() const { return value; } + operator const immutable_ptr & () const { return value; } + operator immutable_ptr & () { return value; } + + operator bool() const { return value != nullptr; } }; public: diff --git a/dbms/src/DataStreams/ColumnGathererStream.cpp b/dbms/src/DataStreams/ColumnGathererStream.cpp index 22349044529..53466599617 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.cpp +++ b/dbms/src/DataStreams/ColumnGathererStream.cpp @@ -60,12 +60,9 @@ Block ColumnGathererStream::readImpl() if (!source_to_fully_copy && row_sources_buf.eof()) return Block(); - output_block = Block{column.cloneEmpty()}; - MutableColumnPtr output_column = output_block.getByPosition(0).column->assumeMutable(); + MutableColumnPtr output_column = column.column->cloneEmpty(); output_column->gather(*this); - if (!output_column->empty()) - output_block.getByPosition(0).column = std::move(output_column); - + output_block = Block{{std::move(output_column), column.type, column.name}}; return output_block; } diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp index 0812e968794..583ebc3af9e 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -69,7 +69,7 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) if (const auto * column_tuple = typeid_cast(column.get())) { - Columns columns = column_tuple->getColumns(); + auto columns = column_tuple->getColumns(); for (auto & element : columns) element = recursiveRemoveLowCardinality(element); return ColumnTuple::create(columns); @@ -142,7 +142,7 @@ ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const Data throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), ErrorCodes::ILLEGAL_COLUMN); - Columns columns = column_tuple->getColumns(); + auto columns = column_tuple->getColumns(); auto & from_elements = from_tuple_type->getElements(); auto & to_elements = to_tuple_type->getElements(); diff --git a/dbms/src/DataTypes/NestedUtils.cpp b/dbms/src/DataTypes/NestedUtils.cpp index 195fd722450..ce421079bbf 100644 --- a/dbms/src/DataTypes/NestedUtils.cpp +++ b/dbms/src/DataTypes/NestedUtils.cpp @@ -101,7 +101,7 @@ Block flatten(const Block & block) const ColumnPtr & column_offsets = column_array->getOffsetsPtr(); const ColumnTuple & column_tuple = typeid_cast(column_array->getData()); - const Columns & element_columns = column_tuple.getColumns(); + const auto & element_columns = column_tuple.getColumns(); for (size_t i = 0; i < tuple_size; ++i) { diff --git a/dbms/src/Functions/FunctionHelpers.cpp b/dbms/src/Functions/FunctionHelpers.cpp index 43a0f73cdfc..1a05d3c2bfe 100644 --- a/dbms/src/Functions/FunctionHelpers.cpp +++ b/dbms/src/Functions/FunctionHelpers.cpp @@ -33,7 +33,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co Columns convertConstTupleToConstantElements(const ColumnConst & column) { const ColumnTuple & src_tuple = static_cast(column.getDataColumn()); - const Columns & src_tuple_columns = src_tuple.getColumns(); + const auto & src_tuple_columns = src_tuple.getColumns(); size_t tuple_size = src_tuple_columns.size(); size_t rows = column.size(); diff --git a/dbms/src/Functions/FunctionsComparison.h b/dbms/src/Functions/FunctionsComparison.h index 13db04ed4ba..d399ef8ff48 100644 --- a/dbms/src/Functions/FunctionsComparison.h +++ b/dbms/src/Functions/FunctionsComparison.h @@ -932,12 +932,12 @@ private: if (x_const) x_columns = convertConstTupleToConstantElements(*x_const); else - x_columns = static_cast(*c0.column).getColumns(); + x_columns = static_cast(*c0.column).getColumnsCopy(); if (y_const) y_columns = convertConstTupleToConstantElements(*y_const); else - y_columns = static_cast(*c1.column).getColumns(); + y_columns = static_cast(*c1.column).getColumnsCopy(); for (size_t i = 0; i < tuple_size; ++i) { diff --git a/dbms/src/Functions/FunctionsExternalDictionaries.h b/dbms/src/Functions/FunctionsExternalDictionaries.h index e9e0f94bb40..12fa4c1293d 100644 --- a/dbms/src/Functions/FunctionsExternalDictionaries.h +++ b/dbms/src/Functions/FunctionsExternalDictionaries.h @@ -167,7 +167,7 @@ private: if (checkColumn(key_col.get())) { - const auto & key_columns = static_cast(*key_col).getColumns(); + const auto & key_columns = static_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); auto out = ColumnUInt8::create(key_col_with_type.column->size()); @@ -353,7 +353,7 @@ private: if (checkColumn(key_col.get())) { - const auto & key_columns = static_cast(*key_col).getColumns(); + const auto & key_columns = static_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); auto out = ColumnString::create(); @@ -580,7 +580,7 @@ private: /// Functions in external dictionaries only support full-value (not constant) columns with keys. ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); - const auto & key_columns = typeid_cast(*key_col).getColumns(); + const auto & key_columns = typeid_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); auto out = ColumnString::create(); @@ -815,7 +815,7 @@ private: if (checkColumn(key_col.get())) { - const auto & key_columns = static_cast(*key_col).getColumns(); + const auto & key_columns = static_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); auto out = ColumnVector::create(key_columns.front()->size()); @@ -1077,7 +1077,7 @@ private: /// Functions in external dictionaries only support full-value (not constant) columns with keys. ColumnPtr key_col = key_col_with_type.column->convertToFullColumnIfConst(); - const auto & key_columns = typeid_cast(*key_col).getColumns(); + const auto & key_columns = typeid_cast(*key_col).getColumnsCopy(); const auto & key_types = static_cast(*key_col_with_type.type).getElements(); /// @todo detect when all key columns are constant diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index 8e63b7387d2..0d13914f103 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -159,7 +159,7 @@ public: ErrorCodes::ILLEGAL_COLUMN); } - const Columns & tuple_columns = tuple_col->getColumns(); + const auto & tuple_columns = tuple_col->getColumns(); const DataTypes & tuple_types = typeid_cast(*block.getByPosition(arguments[0]).type).getElements(); bool use_float64 = WhichDataType(tuple_types[0]).isFloat64() || WhichDataType(tuple_types[1]).isFloat64(); diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 6bd6213e110..3b41e9ab914 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -818,7 +818,7 @@ private: /// Flattening of tuples. if (const ColumnTuple * tuple = typeid_cast(column)) { - const Columns & tuple_columns = tuple->getColumns(); + const auto & tuple_columns = tuple->getColumns(); const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) @@ -826,7 +826,7 @@ private: } else if (const ColumnTuple * tuple_const = checkAndGetColumnConstData(column)) { - const Columns & tuple_columns = tuple_const->getColumns(); + const auto & tuple_columns = tuple_const->getColumns(); const DataTypes & tuple_types = typeid_cast(*type).getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) diff --git a/dbms/src/Functions/GatherUtils/createArraySink.cpp b/dbms/src/Functions/GatherUtils/createArraySink.cpp index e4097195e4c..c7e07dbb18e 100644 --- a/dbms/src/Functions/GatherUtils/createArraySink.cpp +++ b/dbms/src/Functions/GatherUtils/createArraySink.cpp @@ -41,8 +41,7 @@ std::unique_ptr createArraySink(ColumnArray & col, size_t column_siz using Creator = ApplyTypeListForClass::Type; if (auto column_nullable = typeid_cast(&col.getData())) { - auto column = ColumnArray::create(column_nullable->getNestedColumnPtr()->assumeMutable(), - col.getOffsetsPtr()->assumeMutable()); + auto column = ColumnArray::create(column_nullable->getNestedColumnPtr()->assumeMutable(), col.getOffsetsPtr()->assumeMutable()); return Creator::create(*column, &column_nullable->getNullMapData(), column_size); } return Creator::create(col, nullptr, column_size); diff --git a/dbms/src/Functions/arrayElement.cpp b/dbms/src/Functions/arrayElement.cpp index a934a7be84e..a4fce864d54 100644 --- a/dbms/src/Functions/arrayElement.cpp +++ b/dbms/src/Functions/arrayElement.cpp @@ -678,7 +678,7 @@ bool FunctionArrayElement::executeTuple(Block & block, const ColumnNumbers & arg if (!col_nested) return false; - const Columns & tuple_columns = col_nested->getColumns(); + const auto & tuple_columns = col_nested->getColumns(); size_t tuple_size = tuple_columns.size(); const DataTypes & tuple_types = typeid_cast( diff --git a/dbms/src/Functions/arrayIntersect.cpp b/dbms/src/Functions/arrayIntersect.cpp index 86e93ef5ec2..c6cb02e6caf 100644 --- a/dbms/src/Functions/arrayIntersect.cpp +++ b/dbms/src/Functions/arrayIntersect.cpp @@ -156,7 +156,7 @@ ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, c throw Exception{"Cannot cast tuple column to type " + data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR}; - auto columns_number = column_tuple->getColumns().size(); + auto columns_number = column_tuple->tupleSize(); Columns columns(columns_number); const auto & types = tuple_type->getElements(); diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index c3795c93394..7fac62e72a1 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -545,14 +545,14 @@ private: Columns col2_contents; if (const ColumnTuple * tuple1 = typeid_cast(arg1.column.get())) - col1_contents = tuple1->getColumns(); + col1_contents = tuple1->getColumnsCopy(); else if (const ColumnConst * const_tuple = checkAndGetColumnConst(arg1.column.get())) col1_contents = convertConstTupleToConstantElements(*const_tuple); else return false; if (const ColumnTuple * tuple2 = typeid_cast(arg2.column.get())) - col2_contents = tuple2->getColumns(); + col2_contents = tuple2->getColumnsCopy(); else if (const ColumnConst * const_tuple = checkAndGetColumnConst(arg2.column.get())) col2_contents = convertConstTupleToConstantElements(*const_tuple); else diff --git a/dbms/src/Functions/in.cpp b/dbms/src/Functions/in.cpp index 9a984da1bac..9267a42082c 100644 --- a/dbms/src/Functions/in.cpp +++ b/dbms/src/Functions/in.cpp @@ -106,7 +106,7 @@ public: auto set_types = set->getDataTypes(); if (tuple && (set_types.size() != 1 || !set_types[0]->equals(*type_tuple))) { - const Columns & tuple_columns = tuple->getColumns(); + const auto & tuple_columns = tuple->getColumns(); const DataTypes & tuple_types = type_tuple->getElements(); size_t tuple_size = tuple_columns.size(); for (size_t i = 0; i < tuple_size; ++i) diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index cfa2a9a2933..aac0f2bf926 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -295,7 +295,7 @@ private: if (column_with_null[i]) { if (key_pos == i) - res.getByPosition(i).column = makeNullable(std::move(columns[i]))->assumeMutable(); + res.getByPosition(i).column = makeNullable(std::move(columns[i])); else { const ColumnNullable & nullable_col = static_cast(*columns[i]); From 7a02d320fff7e76422fe184af797324cf4361b4b Mon Sep 17 00:00:00 2001 From: Vladislav Smirnov Date: Mon, 25 Mar 2019 20:17:58 +0300 Subject: [PATCH 26/78] rewrite capnproto reading --- dbms/src/Formats/CapnProtoRowInputStream.cpp | 84 +++++++------------- dbms/src/Formats/CapnProtoRowInputStream.h | 2 + 2 files changed, 32 insertions(+), 54 deletions(-) diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp index d1c926b122d..23a1c899838 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.cpp +++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp @@ -206,53 +206,42 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block createActions(list, root); } +kj::Array CapnProtoRowInputStream::readMessage() +{ + uint32_t segment_count; + istr.readStrict(reinterpret_cast(&segment_count), sizeof(uint32_t)); + + // one for segmentCount and one because segmentCount starts from 0 + const auto prefix_size = (2 + segment_count) * sizeof(uint32_t); + const auto words_prefix_size = (segment_count + 1) / 2 + 1; + auto prefix = kj::heapArray(words_prefix_size); + auto prefix_chars = prefix.asChars(); + ::memcpy(prefix_chars.begin(), &segment_count, sizeof(uint32_t)); + + // read size of each segment + for (size_t i = 0; i <= segment_count; ++i) + istr.readStrict(prefix_chars.begin() + ((i + 1) * sizeof(uint32_t)), sizeof(uint32_t)); + + // calculate size of message + const auto expected_words = capnp::expectedSizeInWordsFromPrefix(prefix); + const auto expected_bytes = expected_words * sizeof(capnp::word); + const auto data_size = expected_bytes - prefix_size; + auto msg = kj::heapArray(expected_words); + auto msg_chars = msg.asChars(); + + // read full message + ::memcpy(msg_chars.begin(), prefix_chars.begin(), prefix_size); + istr.readStrict(msg_chars.begin() + prefix_size, data_size); + + return msg; +} bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) { if (istr.eof()) return false; - // Read from underlying buffer directly - auto& buf = istr.buffer(); - auto base = reinterpret_cast(istr.position()); - - // Check if there's enough bytes in the buffer to read the full message - kj::Array heap_array; - kj::ArrayPtr array; - - size_t bytes_size = buf.size() - istr.offset(); - size_t remaining_bytes = 0; - if (bytes_size < sizeof(capnp::word)) // case when we read less than 8 bytes (capnp::word) - { - char edge_bytes[sizeof(capnp::word)]; - while (bytes_size + remaining_bytes < sizeof(capnp::word)) - { - istr.readStrict(edge_bytes + remaining_bytes, bytes_size); - remaining_bytes += bytes_size; - istr.next(); - bytes_size = buf.size(); - } - - auto words_size = bytes_size / sizeof(capnp::word) + 1; - heap_array = kj::heapArray(words_size + 1); - auto chars_heap_array = heap_array.asChars(); - ::memcpy(chars_heap_array.begin(), edge_bytes, remaining_bytes); - ::memcpy(chars_heap_array.begin() + remaining_bytes, buf.begin(), buf.size()); - array = heap_array.asPtr(); - } - else - { - auto words_size = bytes_size / sizeof(capnp::word); - array = kj::arrayPtr(base, words_size); - auto expected_words = capnp::expectedSizeInWordsFromPrefix(array); - if (expected_words * sizeof(capnp::word) > array.size()) - { - // We'll need to reassemble the message in a contiguous buffer - heap_array = kj::heapArray(expected_words); - istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size()); - array = heap_array.asPtr(); - } - } + auto array = readMessage(); #if CAPNP_VERSION >= 8000 capnp::UnalignedFlatArrayMessageReader msg(array); @@ -306,19 +295,6 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &) } } - // Advance buffer position if used directly - if (heap_array.size() == 0) - { - auto parsed = (msg.getEnd() - base) * sizeof(capnp::word); - istr.ignore(parsed); - } - // Advance buffer position if used with remaining bytes from previous buffer - else if (remaining_bytes != 0) - { - auto parsed = (msg.getEnd() - heap_array.begin()) * sizeof(capnp::word) - remaining_bytes; - istr.ignore(parsed); - } - return true; } diff --git a/dbms/src/Formats/CapnProtoRowInputStream.h b/dbms/src/Formats/CapnProtoRowInputStream.h index c40827522aa..02548b5a0fc 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.h +++ b/dbms/src/Formats/CapnProtoRowInputStream.h @@ -38,6 +38,8 @@ public: bool read(MutableColumns & columns, RowReadExtension &) override; private: + kj::Array readMessage(); + // Build a traversal plan from a sorted list of fields void createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader); From 67b6a8966bfbdcac1f8a52c80c3543c363439d61 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 25 Mar 2019 20:56:18 +0300 Subject: [PATCH 27/78] Attempt to make compositions with COWPtr more convenient --- dbms/src/AggregateFunctions/UniqVariadicHash.h | 12 ++++++------ dbms/src/Columns/ColumnTuple.cpp | 12 ++++++++++++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/dbms/src/AggregateFunctions/UniqVariadicHash.h b/dbms/src/AggregateFunctions/UniqVariadicHash.h index c99afd0801a..eb38768f776 100644 --- a/dbms/src/AggregateFunctions/UniqVariadicHash.h +++ b/dbms/src/AggregateFunctions/UniqVariadicHash.h @@ -67,10 +67,10 @@ struct UniqVariadicHash { UInt64 hash; - const Columns & tuple_columns = static_cast(columns[0])->getColumns(); + const auto & tuple_columns = static_cast(columns[0])->getColumns(); - const ColumnPtr * column = tuple_columns.data(); - const ColumnPtr * columns_end = column + num_args; + const auto * column = tuple_columns.data(); + const auto * columns_end = column + num_args; { StringRef value = column->get()->getDataAt(row_num); @@ -116,10 +116,10 @@ struct UniqVariadicHash { static inline UInt128 apply(size_t num_args, const IColumn ** columns, size_t row_num) { - const Columns & tuple_columns = static_cast(columns[0])->getColumns(); + const auto & tuple_columns = static_cast(columns[0])->getColumns(); - const ColumnPtr * column = tuple_columns.data(); - const ColumnPtr * columns_end = column + num_args; + const auto * column = tuple_columns.data(); + const auto * columns_end = column + num_args; SipHash hash; diff --git a/dbms/src/Columns/ColumnTuple.cpp b/dbms/src/Columns/ColumnTuple.cpp index a69ff147394..c5a14f497dd 100644 --- a/dbms/src/Columns/ColumnTuple.cpp +++ b/dbms/src/Columns/ColumnTuple.cpp @@ -58,6 +58,18 @@ ColumnTuple::Ptr ColumnTuple::create(const Columns & columns) return column_tuple; } +ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns) +{ + for (const auto & column : columns) + if (column->isColumnConst()) + throw Exception{"ColumnTuple cannot have ColumnConst as its element", ErrorCodes::ILLEGAL_COLUMN}; + + auto column_tuple = ColumnTuple::create(MutableColumns()); + column_tuple->columns = columns; + + return column_tuple; +} + MutableColumnPtr ColumnTuple::cloneEmpty() const { const size_t tuple_size = columns.size(); From 14480744aa9237a42063adbc61b8e6d8952ce240 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 00:44:10 +0300 Subject: [PATCH 28/78] Removed excessive and unbalanced whitespaces from formatted CREATE query --- dbms/src/Parsers/ASTColumnDeclaration.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ASTColumnDeclaration.cpp b/dbms/src/Parsers/ASTColumnDeclaration.cpp index bf8cf8e0861..b43271868d1 100644 --- a/dbms/src/Parsers/ASTColumnDeclaration.cpp +++ b/dbms/src/Parsers/ASTColumnDeclaration.cpp @@ -39,10 +39,12 @@ ASTPtr ASTColumnDeclaration::clone() const void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { frame.need_parens = false; - std::string indent_str = settings.one_line ? "" : std::string(4 * frame.indent, ' '); + + if (!settings.one_line) + settings.ostr << settings.nl_or_ws << std::string(4 * frame.indent, ' '); /// We have to always backquote column names to avoid ambiguouty with INDEX and other declarations in CREATE query. - settings.ostr << settings.nl_or_ws << indent_str << backQuote(name); + settings.ostr << backQuote(name); if (type) { From 9d232778fc1e9c829f57a91cd8168ece88fbf4f3 Mon Sep 17 00:00:00 2001 From: tai Date: Mon, 25 Mar 2019 16:55:26 +0800 Subject: [PATCH 29/78] Fix crash when distributed modify order by Did not determine if order_by_ast is empty. If it is null, it will clone a null pointer and cause server crash. --- dbms/src/Storages/AlterCommands.cpp | 2 +- .../00910_crash_when_distributed_modify_order_by.reference | 0 .../00910_crash_when_distributed_modify_order_by.sql | 5 +++++ 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.reference create mode 100644 dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index d3790aa3a19..106b1250ab3 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -315,7 +315,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri } else if (type == MODIFY_ORDER_BY) { - if (!primary_key_ast) + if (!primary_key_ast && order_by_ast) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. diff --git a/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.reference b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql new file mode 100644 index 00000000000..003172018e1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql @@ -0,0 +1,5 @@ +DROP TABLE IF EXISTS test.union1; +DROP TABLE IF EXISTS test.union2; +CREATE TABLE test.union1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192); +CREATE TABLE test.union2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost, 'test', 'union1'); +ALTER TABLE test.union2 MODIFY ORDER BY a; -- { serverError 48 } From 56bdbae79e68ca9408e3ad217b6c768d25e3ae18 Mon Sep 17 00:00:00 2001 From: Simon Podlipsky Date: Tue, 26 Mar 2019 10:40:14 +0100 Subject: [PATCH 30/78] Upgrade librdkafka to v1.0.0 --- contrib/librdkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/librdkafka b/contrib/librdkafka index 73295a702cd..8695b9d63ac 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 73295a702cd1c85c11749ade500d713db7099cca +Subproject commit 8695b9d63ac0fe1b891b511d5b36302ffc84d4e2 From 735ae294dd3d962b5a3481e704e35862940a28df Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 26 Mar 2019 15:44:25 +0300 Subject: [PATCH 31/78] Add NOT BETWEEN operator --- docs/en/query_language/operators.md | 2 ++ docs/ru/query_language/operators.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index 61e008c2ba8..602668bf30d 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -51,6 +51,8 @@ Groups of operators are listed in order of priority (the higher it is in the lis `a BETWEEN b AND c` – The same as `a >= b AND a <= c.` +`a NOT BETWEEN b AND c` – The same as `a < b OR a > c.` + ## Operators for Working With Data Sets *See the section [IN operators](select.md#select-in-operators).* diff --git a/docs/ru/query_language/operators.md b/docs/ru/query_language/operators.md index edfc2513d28..89c6dd4c176 100644 --- a/docs/ru/query_language/operators.md +++ b/docs/ru/query_language/operators.md @@ -51,6 +51,8 @@ `a BETWEEN b AND c` - равнозначно `a >= b AND a <= c` +`a NOT BETWEEN b AND c` - равнозначно `a < b OR a > c` + ## Операторы для работы с множествами *Смотрите раздел [Операторы IN](select.md#select-in-operators).* From e49247a22c77d4aefeb0c9fcd37959bda9e47181 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 26 Mar 2019 16:30:12 +0300 Subject: [PATCH 32/78] Website Dockerfile fix (#4809) --- website/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/Dockerfile b/website/Dockerfile index 64eb0ce5e33..ee4f9ffccdc 100644 --- a/website/Dockerfile +++ b/website/Dockerfile @@ -1,4 +1,4 @@ FROM nginx:mainline -COPY . /usr/share/nginx/html +COPY . /usr/share/nginx/html/public COPY nginx/nginx.conf /etc/nginx/nginx.conf COPY nginx/default.conf /etc/nginx/conf.d/default.conf From 9bc4632f2066301a19ad9c55ee841e98b818380d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 17:36:56 +0300 Subject: [PATCH 33/78] Avoid progress bar flicker --- dbms/programs/client/Client.cpp | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 24336822f88..1568f1cc544 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -1314,6 +1315,9 @@ private: /// Received data block is immediately displayed to the user. block_out_stream->flush(); + + /// Restore progress bar after data block. + writeProgress(); } @@ -1353,8 +1357,8 @@ private: void clearProgress() { - std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE; written_progress_chars = 0; + std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE; } @@ -1363,6 +1367,9 @@ private: if (!need_render_progress) return; + /// Output all progress bar commands to stderr at once to avoid flicker. + WriteBufferFromFileDescriptor message(STDERR_FILENO, 1024); + static size_t increment = 0; static const char * indicators[8] = { @@ -1377,13 +1384,15 @@ private: }; if (written_progress_chars) - clearProgress(); + message << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE; else - std::cerr << SAVE_CURSOR_POSITION; + message << SAVE_CURSOR_POSITION; + + message << DISABLE_LINE_WRAPPING; + + size_t prefix_size = message.count(); - std::stringstream message; message << indicators[increment % 8] - << std::fixed << std::setprecision(3) << " Progress: "; message @@ -1398,8 +1407,7 @@ private: else message << ". "; - written_progress_chars = message.str().size() - (increment % 8 == 7 ? 10 : 13); - std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf(); + written_progress_chars = message.count() - prefix_size - (increment % 8 == 7 ? 10 : 13); /// Don't count invisible output (escape sequences). /// If the approximate number of rows to process is known, we can display a progress bar and percentage. if (progress.total_rows > 0) @@ -1421,19 +1429,21 @@ private: if (width_of_progress_bar > 0) { std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar)); - std::cerr << "\033[0;32m" << bar << "\033[0m"; + message << "\033[0;32m" << bar << "\033[0m"; if (width_of_progress_bar > static_cast(bar.size() / UNICODE_BAR_CHAR_SIZE)) - std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); + message << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' '); } } } /// Underestimate percentage a bit to avoid displaying 100%. - std::cerr << ' ' << (99 * progress.rows / total_rows_corrected) << '%'; + message << ' ' << (99 * progress.rows / total_rows_corrected) << '%'; } - std::cerr << ENABLE_LINE_WRAPPING; + message << ENABLE_LINE_WRAPPING; ++increment; + + message.next(); } From 3bf831b34cf93088589c9da8f0f68ec19bd82b61 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 17:59:40 +0300 Subject: [PATCH 34/78] Removed two unused files from debian directory --- debian/daemons | 0 debian/dupload.conf.in | 11 ----------- 2 files changed, 11 deletions(-) delete mode 100644 debian/daemons delete mode 100644 debian/dupload.conf.in diff --git a/debian/daemons b/debian/daemons deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/debian/dupload.conf.in b/debian/dupload.conf.in deleted file mode 100644 index ca1973b3af4..00000000000 --- a/debian/dupload.conf.in +++ /dev/null @@ -1,11 +0,0 @@ -package config; - -$default_host = "metrika"; - -$cfg{'metrika'} = { - fqdn => "", - method => "scpb", - incoming => "/repo/metrika/mini-dinstall/incoming/", - dinstall_runs => 0, - login => "@AUTHOR@" -}; From b747608b7f6980dcf245b8dd7d6045eb555f869d Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 26 Mar 2019 18:46:50 +0300 Subject: [PATCH 35/78] Add info about NULLs and NaNs in arraySort --- .../functions/higher_order_functions.md | 22 ++++++++++++++++- .../functions/higher_order_functions.md | 24 +++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md index dde52c05b7a..ca8612ddab5 100644 --- a/docs/en/query_language/functions/higher_order_functions.md +++ b/docs/en/query_language/functions/higher_order_functions.md @@ -119,11 +119,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]); └────────────────────┘ ``` +Note that NULLs and NaNs go last (NaNs go before NULLs). For example: + +``` sql +SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL]) +``` +``` +┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐ +│ [1,2,3,4,nan,nan,NULL,NULL] │ +└───────────────────────────────────────────────┘ +``` + ### arrayReverseSort(\[func,\] arr1, ...) Returns an array as result of sorting the elements of `arr1` in descending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays) - +Note that NULLs and NaNs go last (NaNs go before NULLs). For example: + +``` sql +SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL]) +``` +``` +┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐ +│ [4,3,2,1,nan,nan,NULL,NULL] │ +└──────────────────────────────────────────────────────┘ +``` diff --git a/docs/ru/query_language/functions/higher_order_functions.md b/docs/ru/query_language/functions/higher_order_functions.md index e26546f11df..f5586cda6ab 100644 --- a/docs/ru/query_language/functions/higher_order_functions.md +++ b/docs/ru/query_language/functions/higher_order_functions.md @@ -61,6 +61,7 @@ SELECT Вернуть первый элемент массива arr1, для которого функция func возвращает не 0. ### arrayFirstIndex(func, arr1, ...) + Вернуть индекс первого элемента массива arr1, для которого функция func возвращает не 0. ### arrayCumSum(\[func,\] arr1, ...) @@ -98,8 +99,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]); └────────────────────┘ ``` +`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например: + +``` sql +SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL]) +``` +``` +┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐ +│ [1,2,3,4,nan,nan,NULL,NULL] │ +└───────────────────────────────────────────────┘ +``` + ### arrayReverseSort(\[func,\] arr1, ...) Возвращает отсортированный в нисходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов). +`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например: + +``` sql +SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL]) +``` +``` +┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐ +│ [4,3,2,1,nan,nan,NULL,NULL] │ +└──────────────────────────────────────────────────────┘ +``` + + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/higher_order_functions/) From 04e76f40de186d8e61b74ad8b6246d7e208150ed Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 26 Mar 2019 19:37:58 +0300 Subject: [PATCH 36/78] Update 00910_crash_when_distributed_modify_order_by.sql --- .../00910_crash_when_distributed_modify_order_by.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql index 003172018e1..89ea9cbcb28 100644 --- a/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql +++ b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql @@ -3,3 +3,5 @@ DROP TABLE IF EXISTS test.union2; CREATE TABLE test.union1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192); CREATE TABLE test.union2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost, 'test', 'union1'); ALTER TABLE test.union2 MODIFY ORDER BY a; -- { serverError 48 } +DROP TABLE test.union1; +DROP TABLE test.union2; From 9fbb19f073f7a6925d23989f307989034676ee13 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 26 Mar 2019 19:39:49 +0300 Subject: [PATCH 37/78] LIMIT OFFSET syntax --- docs/en/query_language/select.md | 5 +++-- docs/ru/query_language/select.md | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 1e4f287bfcd..8b48d931d9e 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -761,11 +761,12 @@ DISTINCT is not supported if SELECT has at least one array column. ### LIMIT Clause `LIMIT m` allows you to select the first `m` rows from the result. -`LIMIT n`, m allows you to select the first `m` rows from the result after skipping the first `n` rows. + +`LIMIT n, m` allows you to select the first `m` rows from the result after skipping the first `n` rows. The `LIMIT m OFFSET n` syntax is also supported. `n` and `m` must be non-negative integers. -If there isn't an ORDER BY clause that explicitly sorts results, the result may be arbitrary and nondeterministic. +If there isn't an `ORDER BY` clause that explicitly sorts results, the result may be arbitrary and nondeterministic. ### UNION ALL Clause diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index b9c899f6532..bd103b4cc14 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -714,12 +714,13 @@ WHERE и HAVING отличаются тем, что WHERE выполняется ### Секция LIMIT -LIMIT m позволяет выбрать из результата первые m строк. -LIMIT n, m позволяет выбрать из результата первые m строк после пропуска первых n строк. +`LIMIT m` позволяет выбрать из результата первые `m` строк. + +`LIMIT n, m` позволяет выбрать из результата первые `m` строк после пропуска первых `n` строк. Синтаксис `LIMIT m OFFSET n` также поддерживается. n и m должны быть неотрицательными целыми числами. -При отсутствии секции ORDER BY, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным. +При отсутствии секции `ORDER BY`, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным. ### Секция UNION ALL From dc015ee5bf2a913b0ef00592b8ef29b2e694c285 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 26 Mar 2019 19:40:43 +0300 Subject: [PATCH 38/78] LIMIT OFFSET syntax --- docs/ru/query_language/select.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index bd103b4cc14..153e20bd8df 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -718,7 +718,7 @@ WHERE и HAVING отличаются тем, что WHERE выполняется `LIMIT n, m` позволяет выбрать из результата первые `m` строк после пропуска первых `n` строк. Синтаксис `LIMIT m OFFSET n` также поддерживается. -n и m должны быть неотрицательными целыми числами. +`n` и `m` должны быть неотрицательными целыми числами. При отсутствии секции `ORDER BY`, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным. From 75f1ba6763c4381aa46e3a1899ab70233c7b96c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 20:23:59 +0300 Subject: [PATCH 39/78] Added bug --- dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql | 1 + 1 file changed, 1 insertion(+) create mode 100644 dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql diff --git a/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql b/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql new file mode 100644 index 00000000000..40bf433f5a3 --- /dev/null +++ b/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql @@ -0,0 +1 @@ +SELECT a FROM (SELECT 1 AS a, (SELECT count() FROM system.numbers) AS b); From 87e72a9941b7b0c343d8200364ae36239e569d42 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 20:27:03 +0300 Subject: [PATCH 40/78] Removed bug (already fixed) #4108 --- dbms/tests/queries/bugs/all_join.sql | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 dbms/tests/queries/bugs/all_join.sql diff --git a/dbms/tests/queries/bugs/all_join.sql b/dbms/tests/queries/bugs/all_join.sql deleted file mode 100644 index 8b63adf36f0..00000000000 --- a/dbms/tests/queries/bugs/all_join.sql +++ /dev/null @@ -1,24 +0,0 @@ -drop table if exists test.persons; -drop table if exists test.children; - -create table test.persons ( - id String, - name String -) engine MergeTree order by id; - -create table test.children ( - id String, - childName String -) engine MergeTree order by id; - -insert into test.persons (id, name) values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda'); -insert into test.children (id, childName) values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert'); - - -select * from test.persons all inner join test.children using id; - -select * from test.persons all inner join (select * from test.children) as j using id; - -select * from (select * from test.persons) as s all inner join (select * from test.children) as j using id; - - From 9e263fb0cace573d508d52bc2e0421fd4cc3afe5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 26 Mar 2019 22:23:12 +0300 Subject: [PATCH 41/78] Updated tests, part 1 #4782 --- .../00061_merge_tree_alter.reference | 20 +++++++++---------- ...80_show_tables_and_system_tables.reference | 4 ++-- ...00564_temporary_table_management.reference | 2 +- .../00599_create_view_with_subquery.reference | 2 +- .../queries/0_stateless/00642_cast.reference | 2 +- .../00725_comment_columns.reference | 12 +++++------ .../00725_ipv4_ipv6_domains.reference | 4 ++-- ...51_default_databasename_for_view.reference | 2 +- .../00754_alter_modify_order_by.reference | 2 +- ...4_test_custom_compression_codecs.reference | 4 ++-- ...m_compression_codes_log_storages.reference | 8 ++++---- .../0_stateless/00836_indices_alter.reference | 10 +++++----- .../queries/0_stateless/00902_entropy.sql | 5 ----- .../00916_create_or_replace_view.reference | 4 ++-- 14 files changed, 38 insertions(+), 43 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference index 21d00634771..ac7b240f4d5 100644 --- a/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference +++ b/dbms/tests/queries/0_stateless/00061_merge_tree_alter.reference @@ -1,14 +1,14 @@ d Date k UInt64 i32 Int32 -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 8 40 [1,2,3] ['12','13','14'] 2015-01-01 10 42 [] [] d Date @@ -17,7 +17,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 10 42 [] [] [] @@ -28,7 +28,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -39,7 +39,7 @@ i32 Int32 n.ui8 Array(UInt8) n.s Array(String) s Int64 -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 @@ -51,7 +51,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 n.d Array(Date) -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32, `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -65,7 +65,7 @@ k UInt64 i32 Int32 n.s Array(String) s UInt32 -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, `n.s` Array(String), s UInt32) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `n.s` Array(String), `s` UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 ['120','130','140'] 0 2015-01-01 8 40 ['12','13','14'] 0 @@ -74,7 +74,7 @@ d Date k UInt64 i32 Int32 s UInt32 -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 @@ -85,7 +85,7 @@ i32 Int32 s UInt32 n.s Array(String) n.d Array(Date) -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 [] [] 2015-01-01 7 39 0 [] [] 2015-01-01 8 40 0 [] [] @@ -94,7 +94,7 @@ d Date k UInt64 i32 Int32 s UInt32 -CREATE TABLE test.alter ( d Date, k UInt64, i32 Int32, s UInt32) ENGINE = MergeTree(d, k, 8192) +CREATE TABLE test.alter (`d` Date, `k` UInt64, `i32` Int32, `s` UInt32) ENGINE = MergeTree(d, k, 8192) 2015-01-01 6 38 100500 2015-01-01 7 39 0 2015-01-01 8 40 0 diff --git a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference index bcef36e246a..a1ceaf94bfa 100644 --- a/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference +++ b/dbms/tests/queries/0_stateless/00080_show_tables_and_system_tables.reference @@ -1,7 +1,7 @@ A B -A 1 TinyLog CREATE TABLE test_show_tables.A ( A UInt8) ENGINE = TinyLog -B 1 TinyLog CREATE TABLE test_show_tables.B ( A UInt8) ENGINE = TinyLog +A 1 TinyLog CREATE TABLE test_show_tables.A (`A` UInt8) ENGINE = TinyLog +B 1 TinyLog CREATE TABLE test_show_tables.B (`A` UInt8) ENGINE = TinyLog test_temporary_table ['test_show_tables'] ['test_materialized'] 0 diff --git a/dbms/tests/queries/0_stateless/00564_temporary_table_management.reference b/dbms/tests/queries/0_stateless/00564_temporary_table_management.reference index 826f088fc74..edd17b9ea39 100644 --- a/dbms/tests/queries/0_stateless/00564_temporary_table_management.reference +++ b/dbms/tests/queries/0_stateless/00564_temporary_table_management.reference @@ -1,4 +1,4 @@ 1 -CREATE TEMPORARY TABLE temp_tab ( number UInt64) ENGINE = Memory +CREATE TEMPORARY TABLE temp_tab (`number` UInt64) ENGINE = Memory temp_tab 0 diff --git a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference index 5d822605733..1f0c8151996 100644 --- a/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference +++ b/dbms/tests/queries/0_stateless/00599_create_view_with_subquery.reference @@ -1 +1 @@ -CREATE VIEW test.test_view ( id UInt64) AS SELECT * FROM test.test WHERE id = (SELECT 1) +CREATE VIEW test.test_view (`id` UInt64) AS SELECT * FROM test.test WHERE id = (SELECT 1) diff --git a/dbms/tests/queries/0_stateless/00642_cast.reference b/dbms/tests/queries/0_stateless/00642_cast.reference index 58518327779..6a845ada2a0 100644 --- a/dbms/tests/queries/0_stateless/00642_cast.reference +++ b/dbms/tests/queries/0_stateless/00642_cast.reference @@ -7,7 +7,7 @@ hello hello hello 1970-01-01 00:00:01 -CREATE TABLE test.cast ( x UInt8, e Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 +CREATE TABLE test.cast (`x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 x UInt8 e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/dbms/tests/queries/0_stateless/00725_comment_columns.reference b/dbms/tests/queries/0_stateless/00725_comment_columns.reference index 77b7c9415af..058a7ed5e66 100644 --- a/dbms/tests/queries/0_stateless/00725_comment_columns.reference +++ b/dbms/tests/queries/0_stateless/00725_comment_columns.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1\', second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2\', third_column UInt8 ALIAS second_column COMMENT \'comment 3\', fourth_column UInt8 COMMENT \'comment 4\', fifth_column UInt8) ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3\', `fourth_column` UInt8 COMMENT \'comment 4\', `fifth_column` UInt8) ENGINE = TinyLog first_column UInt8 DEFAULT 1 comment 1 second_column UInt8 MATERIALIZED first_column comment 2 third_column UInt8 ALIAS second_column comment 3 @@ -11,7 +11,7 @@ fifth_column UInt8 │ check_query_comment_column │ fourth_column │ comment 4 │ │ check_query_comment_column │ fifth_column │ │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1_1\', second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', third_column UInt8 ALIAS second_column COMMENT \'comment 3_1\', fourth_column UInt8 COMMENT \'comment 4_1\', fifth_column UInt8 COMMENT \'comment 5_1\') ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_1\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_1\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_1\', `fourth_column` UInt8 COMMENT \'comment 4_1\', `fifth_column` UInt8 COMMENT \'comment 5_1\') ENGINE = TinyLog ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_2 │ │ check_query_comment_column │ second_column │ comment 2_2 │ @@ -19,8 +19,8 @@ CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMM │ check_query_comment_column │ fourth_column │ comment 4_2 │ │ check_query_comment_column │ fifth_column │ comment 5_2 │ └────────────────────────────┴───────────────┴─────────────┘ -CREATE TABLE test.check_query_comment_column ( first_column UInt8 DEFAULT 1 COMMENT \'comment 1_2\', second_column UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', third_column UInt8 ALIAS second_column COMMENT \'comment 3_2\', fourth_column UInt8 COMMENT \'comment 4_2\', fifth_column UInt8 COMMENT \'comment 5_2\') ENGINE = TinyLog -CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1\', second_column UInt8 COMMENT \'comment 2\', third_column UInt8 COMMENT \'comment 3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 DEFAULT 1 COMMENT \'comment 1_2\', `second_column` UInt8 MATERIALIZED first_column COMMENT \'comment 2_2\', `third_column` UInt8 ALIAS second_column COMMENT \'comment 3_2\', `fourth_column` UInt8 COMMENT \'comment 4_2\', `fifth_column` UInt8 COMMENT \'comment 5_2\') ENGINE = TinyLog +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1\', `second_column` UInt8 COMMENT \'comment 2\', `third_column` UInt8 COMMENT \'comment 3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 first_column UInt8 comment 1 second_column UInt8 comment 2 third_column UInt8 comment 3 @@ -29,8 +29,8 @@ third_column UInt8 comment 3 │ check_query_comment_column │ second_column │ comment 2 │ │ check_query_comment_column │ third_column │ comment 3 │ └────────────────────────────┴───────────────┴───────────┘ -CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1_2\', second_column UInt8 COMMENT \'comment 2_2\', third_column UInt8 COMMENT \'comment 3_2\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 -CREATE TABLE test.check_query_comment_column ( first_column UInt8 COMMENT \'comment 1_3\', second_column UInt8 COMMENT \'comment 2_3\', third_column UInt8 COMMENT \'comment 3_3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1_2\', `second_column` UInt8 COMMENT \'comment 2_2\', `third_column` UInt8 COMMENT \'comment 3_2\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 +CREATE TABLE test.check_query_comment_column (`first_column` UInt8 COMMENT \'comment 1_3\', `second_column` UInt8 COMMENT \'comment 2_3\', `third_column` UInt8 COMMENT \'comment 3_3\') ENGINE = MergeTree() PARTITION BY second_column ORDER BY first_column SAMPLE BY first_column SETTINGS index_granularity = 8192 ┌─table──────────────────────┬─name──────────┬─comment─────┐ │ check_query_comment_column │ first_column │ comment 1_3 │ │ check_query_comment_column │ second_column │ comment 2_3 │ diff --git a/dbms/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/dbms/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference index 9478e02fcaf..5ca561858e6 100644 --- a/dbms/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference +++ b/dbms/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.ipv4_test ( ipv4_ IPv4) ENGINE = Memory +CREATE TABLE test.ipv4_test (`ipv4_` IPv4) ENGINE = Memory 0.0.0.0 00 8.8.8.8 08080808 127.0.0.1 7F000001 @@ -10,7 +10,7 @@ CREATE TABLE test.ipv4_test ( ipv4_ IPv4) ENGINE = Memory > 127.0.0.1 255.255.255.255 = 127.0.0.1 127.0.0.1 euqality of IPv4-mapped IPv6 value and IPv4 promoted to IPv6 with function: 1 -CREATE TABLE test.ipv6_test ( ipv6_ IPv6) ENGINE = Memory +CREATE TABLE test.ipv6_test (`ipv6_` IPv6) ENGINE = Memory :: 00000000000000000000000000000000 :: 00000000000000000000000000000000 ::ffff:8.8.8.8 00000000000000000000FFFF08080808 diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference index a6aa649ae54..a7dc6f0b32a 100644 --- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test.t_mv ( date Date, platform Enum8('a' = 0, 'b' = 1), app Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t WHERE (app = (SELECT min(app) FROM test.u )) AND (platform = (SELECT (SELECT min(platform) FROM test.v ))) +CREATE MATERIALIZED VIEW test.t_mv (`date` Date, `platform` Enum8('a' = 0, 'b' = 1), `app` Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t WHERE (app = (SELECT min(app) FROM test.u )) AND (platform = (SELECT (SELECT min(platform) FROM test.v ))) 2000-01-01 a a 2000-01-02 b b 2000-01-03 a a diff --git a/dbms/tests/queries/0_stateless/00754_alter_modify_order_by.reference b/dbms/tests/queries/0_stateless/00754_alter_modify_order_by.reference index b63ec7d0c42..876c5c9626f 100644 --- a/dbms/tests/queries/0_stateless/00754_alter_modify_order_by.reference +++ b/dbms/tests/queries/0_stateless/00754_alter_modify_order_by.reference @@ -9,4 +9,4 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE test.summing ( x UInt32, y UInt32, z UInt32, val UInt32) ENGINE = SummingMergeTree PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 +CREATE TABLE test.summing (`x` UInt32, `y` UInt32, `z` UInt32, `val` UInt32) ENGINE = SummingMergeTree PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 diff --git a/dbms/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference b/dbms/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference index acb8d0e429b..fb5468664a0 100644 --- a/dbms/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference +++ b/dbms/tests/queries/0_stateless/00804_test_custom_compression_codecs.reference @@ -9,10 +9,10 @@ 10003 274972506.6 9175437371954010821 -CREATE TABLE test.compression_codec_multiple_more_types ( id Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), data FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE test.compression_codec_multiple_more_types (`id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! 222 !ZSTD -CREATE TABLE test.test_default_delta ( id UInt64 CODEC(Delta(8)), data String CODEC(Delta(1)), somedate Date CODEC(Delta(2)), somenum Float64 CODEC(Delta(8)), somestr FixedString(3) CODEC(Delta(1)), othernum Int64 CODEC(Delta(8)), yetothernum Float32 CODEC(Delta(4)), `ddd.age` Array(UInt8) CODEC(Delta(1)), `ddd.Name` Array(String) CODEC(Delta(1)), `ddd.OName` Array(String) CODEC(Delta(1)), `ddd.BName` Array(String) CODEC(Delta(1))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE test.test_default_delta (`id` UInt64 CODEC(Delta(8)), `data` String CODEC(Delta(1)), `somedate` Date CODEC(Delta(2)), `somenum` Float64 CODEC(Delta(8)), `somestr` FixedString(3) CODEC(Delta(1)), `othernum` Int64 CODEC(Delta(8)), `yetothernum` Float32 CODEC(Delta(4)), `ddd.age` Array(UInt8) CODEC(Delta(1)), `ddd.Name` Array(String) CODEC(Delta(1)), `ddd.OName` Array(String) CODEC(Delta(1)), `ddd.BName` Array(String) CODEC(Delta(1))) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192 diff --git a/dbms/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference b/dbms/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference index 8104bd3831f..6b9d1a71d75 100644 --- a/dbms/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference +++ b/dbms/tests/queries/0_stateless/00804_test_custom_compression_codes_log_storages.reference @@ -1,9 +1,9 @@ -CREATE TABLE test.compression_codec_log ( id UInt64 CODEC(LZ4), data String CODEC(ZSTD(1)), ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), othernum Int64 CODEC(Delta(8))) ENGINE = Log() +CREATE TABLE test.compression_codec_log (`id` UInt64 CODEC(LZ4), `data` String CODEC(ZSTD(1)), `ddd` Date CODEC(NONE), `somenum` Float64 CODEC(ZSTD(2)), `somestr` FixedString(3) CODEC(LZ4HC(7)), `othernum` Int64 CODEC(Delta(8))) ENGINE = Log() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE test.compression_codec_multiple_log ( id UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = Log() +CREATE TABLE test.compression_codec_multiple_log (`id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = Log() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 @@ -11,12 +11,12 @@ CREATE TABLE test.compression_codec_multiple_log ( id UInt64 CODEC(LZ4, ZSTD(1), 10003 274972506.6 9175437371954010821 -CREATE TABLE test.compression_codec_tiny_log ( id UInt64 CODEC(LZ4), data String CODEC(ZSTD(1)), ddd Date CODEC(NONE), somenum Float64 CODEC(ZSTD(2)), somestr FixedString(3) CODEC(LZ4HC(7)), othernum Int64 CODEC(Delta(8))) ENGINE = TinyLog() +CREATE TABLE test.compression_codec_tiny_log (`id` UInt64 CODEC(LZ4), `data` String CODEC(ZSTD(1)), `ddd` Date CODEC(NONE), `somenum` Float64 CODEC(ZSTD(2)), `somestr` FixedString(3) CODEC(LZ4HC(7)), `othernum` Int64 CODEC(Delta(8))) ENGINE = TinyLog() 1 hello 2018-12-14 1.1 aaa 5 2 world 2018-12-15 2.2 bbb 6 3 ! 2018-12-16 3.3 ccc 7 2 -CREATE TABLE test.compression_codec_multiple_tiny_log ( id UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), data String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), ddd Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), somenum Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = TinyLog() +CREATE TABLE test.compression_codec_multiple_tiny_log (`id` UInt64 CODEC(LZ4, ZSTD(1), NONE, LZ4HC(0), Delta(4)), `data` String CODEC(ZSTD(2), NONE, Delta(2), LZ4HC(0), LZ4, LZ4, Delta(8)), `ddd` Date CODEC(NONE, NONE, NONE, Delta(1), LZ4, ZSTD(1), LZ4HC(0), LZ4HC(0)), `somenum` Float64 CODEC(Delta(4), LZ4, LZ4, ZSTD(2), LZ4HC(5), ZSTD(3), ZSTD(1))) ENGINE = TinyLog() 1 world 2018-10-05 1.1 2 hello 2018-10-01 2.2 3 buy 2018-10-11 3.3 diff --git a/dbms/tests/queries/0_stateless/00836_indices_alter.reference b/dbms/tests/queries/0_stateless/00836_indices_alter.reference index 43447d944de..e41e349c06c 100644 --- a/dbms/tests/queries/0_stateless/00836_indices_alter.reference +++ b/dbms/tests/queries/0_stateless/00836_indices_alter.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -6,15 +6,15 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYP 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -23,6 +23,6 @@ CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYP 1 2 1 2 1 2 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 (`u64` UInt64, `i32` Int32) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 diff --git a/dbms/tests/queries/0_stateless/00902_entropy.sql b/dbms/tests/queries/0_stateless/00902_entropy.sql index 30cc2c51f77..eef1f9b829c 100644 --- a/dbms/tests/queries/0_stateless/00902_entropy.sql +++ b/dbms/tests/queries/0_stateless/00902_entropy.sql @@ -1,4 +1,3 @@ -CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults ( @@ -9,7 +8,6 @@ insert into test.defaults values ('ba'), ('aa'), ('ba'), ('b'), ('ba'), ('aa'); select val < 1.5 and val > 1.459 from (select entropy(vals) as val from test.defaults); -CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults ( @@ -19,7 +17,6 @@ insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5 select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); -CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults ( @@ -29,7 +26,6 @@ insert into test.defaults values (0), (0), (1), (0), (0), (0), (1), (2), (3), (5 select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); -CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults ( @@ -39,7 +35,6 @@ insert into test.defaults values (0), (0), (-1), (0), (0), (0), (-1), (2), (3), select val < 2.4 and val > 2.3393 from (select entropy(vals) as val from test.defaults); -CREATE DATABASE IF NOT EXISTS test; DROP TABLE IF EXISTS test.defaults; CREATE TABLE IF NOT EXISTS test.defaults ( diff --git a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference index c8e912a9a45..f8b7469aa64 100644 --- a/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference +++ b/dbms/tests/queries/0_stateless/00916_create_or_replace_view.reference @@ -1,2 +1,2 @@ -CREATE VIEW test.t ( number UInt64) AS SELECT number FROM system.numbers -CREATE VIEW test.t ( next_number UInt64) AS SELECT number + 1 AS next_number FROM system.numbers +CREATE VIEW test.t (`number` UInt64) AS SELECT number FROM system.numbers +CREATE VIEW test.t (`next_number` UInt64) AS SELECT number + 1 AS next_number FROM system.numbers From 0a0e986f7531d5d85c7275587d1ad5982a8235c7 Mon Sep 17 00:00:00 2001 From: chertus Date: Tue, 26 Mar 2019 22:46:03 +0300 Subject: [PATCH 42/78] fix crash on join not nullable with nullable --- dbms/src/Interpreters/Join.cpp | 182 +++++++++--------- .../00848_join_use_nulls_segfault.reference | 44 +++++ .../00848_join_use_nulls_segfault.sql | 69 +++++++ 3 files changed, 203 insertions(+), 92 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference create mode 100644 dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 3336f175ebe..cf603b4efbd 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -32,19 +32,17 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } -static NameSet requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) -{ - NameSet required; +static std::unordered_map requiredRightKeys(const Names & key_names, const NamesAndTypesList & columns_added_by_join) +{ NameSet right_keys; for (const auto & name : key_names) right_keys.insert(name); + std::unordered_map required; for (const auto & column : columns_added_by_join) - { if (right_keys.count(column.name)) - required.insert(column.name); - } + required.insert({column.name, column.type}); return required; } @@ -218,11 +216,22 @@ size_t Join::getTotalByteCount() const static void convertColumnToNullable(ColumnWithTypeAndName & column) { + if (column.type->isNullable()) + return; + column.type = makeNullable(column.type); if (column.column) column.column = makeNullable(column.column); } +/// Converts column to nullable if needed. No backward convertion. +ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) +{ + if (nullable) + convertColumnToNullable(column); + return column; +} + void Join::setSampleBlock(const Block & block) { @@ -720,7 +729,7 @@ void Join::joinBlockImpl( /// Filter & insert missing rows - NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { @@ -737,10 +746,12 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); - block.insert({col.column, col.type, right_name}); + bool is_nullable = it->second->isNullable(); + block.insert(correctNullability({col.column, col.type, right_name}, is_nullable)); } } } @@ -752,7 +763,8 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); ColumnPtr column = col.column->convertToFullColumnIfConst(); @@ -766,13 +778,15 @@ void Join::joinBlockImpl( mut_column->insertDefault(); } - block.insert({std::move(mut_column), col.type, right_name}); + bool is_nullable = use_nulls || it->second->isNullable(); + block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable)); } } } } else { + constexpr bool left_or_full = static_in_v; if (!offsets_to_replicate) throw Exception("No data to filter columns", ErrorCodes::LOGICAL_ERROR); @@ -782,7 +796,8 @@ void Join::joinBlockImpl( auto & right_name = key_names_right[i]; auto & left_name = key_names_left[i]; - if (needed_key_names_right.count(right_name) && !block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !block.has(right_name)) { const auto & col = block.getByName(left_name); ColumnPtr column = col.column->convertToFullColumnIfConst(); @@ -803,7 +818,8 @@ void Join::joinBlockImpl( last_offset = (*offsets_to_replicate)[row]; } - block.insert({std::move(mut_column), col.type, right_name}); + bool is_nullable = (use_nulls && left_or_full) || it->second->isNullable(); + block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable)); } } @@ -997,11 +1013,8 @@ struct AdderNonJoined; template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { - for (size_t j = 0; j < columns_left.size(); ++j) - columns_left[j]->insertDefault(); - for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*mapped.block->getByPosition(j).column.get(), mapped.row_num); @@ -1012,13 +1025,10 @@ struct AdderNonJoined template struct AdderNonJoined { - static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_left, MutableColumns & columns_right) + static void add(const Mapped & mapped, size_t & rows_added, MutableColumns & columns_right) { for (auto current = &static_cast(mapped); current != nullptr; current = current->next) { - for (size_t j = 0; j < columns_left.size(); ++j) - columns_left[j]->insertDefault(); - for (size_t j = 0; j < columns_right.size(); ++j) columns_right[j]->insertFrom(*current->block->getByPosition(j).column.get(), current->row_num); @@ -1040,53 +1050,51 @@ public: * result_sample_block - keys, "left" columns, and "right" columns. */ - std::unordered_map key_renames; - makeResultSampleBlock(left_sample_block, key_names_left, columns_added_by_join, key_renames); - - const Block & right_sample_block = parent.sample_block_with_columns_to_add; - - size_t num_keys = key_names_left.size(); - size_t num_columns_left = left_sample_block.columns() - num_keys; - size_t num_columns_right = right_sample_block.columns(); - - column_indices_left.reserve(num_columns_left); - column_indices_keys_and_right.reserve(num_keys + num_columns_right); - std::vector is_left_key(left_sample_block.columns(), false); + std::vector key_positions_left; + key_positions_left.reserve(key_names_left.size()); for (const std::string & key : key_names_left) { size_t key_pos = left_sample_block.getPositionByName(key); + key_positions_left.push_back(key_pos); is_left_key[key_pos] = true; + } + + const Block & right_sample_block = parent.sample_block_with_columns_to_add; + + std::unordered_map left_to_right_key_map; + makeResultSampleBlock(left_sample_block, right_sample_block, columns_added_by_join, + key_positions_left, is_left_key, left_to_right_key_map); + + column_indices_left.reserve(left_sample_block.columns() - key_names_left.size()); + column_indices_keys_and_right.reserve(key_names_left.size() + right_sample_block.columns()); + + /// Use right key columns if present. @note left & right key columns could have different nullability. + for (size_t key_pos : key_positions_left) + { /// Here we establish the mapping between key columns of the left- and right-side tables. /// key_pos index is inserted in the position corresponding to key column in parent.blocks /// (saved blocks of the right-side table) and points to the same key column /// in the left_sample_block and thus in the result_sample_block. - column_indices_keys_and_right.push_back(key_pos); - auto it = key_renames.find(key); - if (it != key_renames.end()) - key_renames_indices[key_pos] = result_sample_block.getPositionByName(it->second); - } - - size_t num_src_columns = left_sample_block.columns() + right_sample_block.columns(); - - for (size_t i = 0; i < result_sample_block.columns(); ++i) - { - if (i < left_sample_block.columns()) + auto it = left_to_right_key_map.find(key_pos); + if (it != left_to_right_key_map.end()) { - if (!is_left_key[i]) - { - column_indices_left.emplace_back(i); - - /// If use_nulls, convert left columns to Nullable. - if (parent.use_nulls) - convertColumnToNullable(result_sample_block.getByPosition(i)); - } + column_indices_keys_and_right.push_back(it->second); + column_indices_left.push_back(key_pos); } - else if (i < num_src_columns) - column_indices_keys_and_right.emplace_back(i); + else + column_indices_keys_and_right.push_back(key_pos); } + + for (size_t i = 0; i < left_sample_block.columns(); ++i) + if (!is_left_key[i]) + column_indices_left.emplace_back(i); + + size_t num_additional_keys = left_to_right_key_map.size(); + for (size_t i = left_sample_block.columns(); i < result_sample_block.columns() - num_additional_keys; ++i) + column_indices_keys_and_right.emplace_back(i); } String getName() const override { return "NonJoined"; } @@ -1118,18 +1126,25 @@ private: /// Indices of key columns in result_sample_block or columns that come from the right-side table. /// Order is significant: it is the same as the order of columns in the blocks of the right-side table that are saved in parent.blocks. ColumnNumbers column_indices_keys_and_right; - std::unordered_map key_renames_indices; std::unique_ptr> position; /// type erasure - void makeResultSampleBlock(const Block & left_sample_block, const Names & key_names_left, - const NamesAndTypesList & columns_added_by_join, std::unordered_map & key_renames) + void makeResultSampleBlock(const Block & left_sample_block, const Block & right_sample_block, + const NamesAndTypesList & columns_added_by_join, + const std::vector & key_positions_left, const std::vector & is_left_key, + std::unordered_map & left_to_right_key_map) { - const Block & right_sample_block = parent.sample_block_with_columns_to_add; - result_sample_block = materializeBlock(left_sample_block); + /// Convert left columns to Nullable if allowed + if (parent.use_nulls) + { + for (size_t i = 0; i < result_sample_block.columns(); ++i) + if (!is_left_key[i]) + convertColumnToNullable(result_sample_block.getByPosition(i)); + } + /// Add columns from the right-side table to the block. for (size_t i = 0; i < right_sample_block.columns(); ++i) { @@ -1139,20 +1154,23 @@ private: } const auto & key_names_right = parent.key_names_right; - NameSet needed_key_names_right = requiredRightKeys(key_names_right, columns_added_by_join); + auto right_keys = requiredRightKeys(key_names_right, columns_added_by_join); /// Add join key columns from right block if they has different name. for (size_t i = 0; i < key_names_right.size(); ++i) { auto & right_name = key_names_right[i]; - auto & left_name = key_names_left[i]; + size_t left_key_pos = key_positions_left[i]; - if (needed_key_names_right.count(right_name) && !result_sample_block.has(right_name)) + auto it = right_keys.find(right_name); + if (it != right_keys.end() && !result_sample_block.has(right_name)) { - const auto & col = result_sample_block.getByName(left_name); - result_sample_block.insert({col.column, col.type, right_name}); + const auto & col = result_sample_block.getByPosition(left_key_pos); + bool is_nullable = (parent.use_nulls && isFull(parent.kind)) || it->second->isNullable(); + result_sample_block.insert(correctNullability({col.column, col.type, right_name}, is_nullable)); - key_renames[left_name] = right_name; + size_t right_key_pos = result_sample_block.getPositionByName(right_name); + left_to_right_key_map[left_key_pos] = right_key_pos; } } } @@ -1169,7 +1187,7 @@ private: { #define M(TYPE) \ case Join::Type::TYPE: \ - rows_added = fillColumns(*maps.TYPE, columns_left, columns_keys_and_right); \ + rows_added = fillColumns(*maps.TYPE, columns_keys_and_right); \ break; APPLY_FOR_JOIN_VARIANTS(M) #undef M @@ -1183,32 +1201,12 @@ private: Block res = result_sample_block.cloneEmpty(); + /// @note it's possible to make ColumnConst here and materialize it later for (size_t i = 0; i < columns_left.size(); ++i) - res.getByPosition(column_indices_left[i]).column = std::move(columns_left[i]); + res.getByPosition(column_indices_left[i]).column = columns_left[i]->cloneResized(rows_added); - if (key_renames_indices.empty()) - { - for (size_t i = 0; i < columns_keys_and_right.size(); ++i) - res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); - } - else - { - for (size_t i = 0; i < columns_keys_and_right.size(); ++i) - { - size_t key_idx = column_indices_keys_and_right[i]; - - auto it = key_renames_indices.find(key_idx); - if (it != key_renames_indices.end()) - { - auto & key_column = res.getByPosition(key_idx).column; - if (key_column->empty()) - key_column = key_column->cloneResized(columns_keys_and_right[i]->size()); - res.getByPosition(it->second).column = std::move(columns_keys_and_right[i]); - } - else - res.getByPosition(key_idx).column = std::move(columns_keys_and_right[i]); - } - } + for (size_t i = 0; i < columns_keys_and_right.size(); ++i) + res.getByPosition(column_indices_keys_and_right[i]).column = std::move(columns_keys_and_right[i]); return res; } @@ -1230,7 +1228,7 @@ private: } template - size_t fillColumns(const Map & map, MutableColumns & columns_left, MutableColumns & columns_keys_and_right) + size_t fillColumns(const Map & map, MutableColumns & columns_keys_and_right) { size_t rows_added = 0; @@ -1247,7 +1245,7 @@ private: if (it->getSecond().getUsed()) continue; - AdderNonJoined::add(it->getSecond(), rows_added, columns_left, columns_keys_and_right); + AdderNonJoined::add(it->getSecond(), rows_added, columns_keys_and_right); if (rows_added >= max_block_size) { diff --git a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference new file mode 100644 index 00000000000..f8527a732c5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference @@ -0,0 +1,44 @@ +on +l \N String Nullable(String) +l \N String Nullable(String) + r \N String Nullable(String) +\N r \N Nullable(String) Nullable(String) +l \N String Nullable(String) +l \N String Nullable(String) + r \N String Nullable(String) +\N r \N Nullable(String) Nullable(String) +0 \N +0 \N +using +l \N String Nullable(String) +l \N String Nullable(String) + \N String Nullable(String) +\N \N Nullable(String) Nullable(String) +l \N String Nullable(String) +l \N String Nullable(String) + \N String Nullable(String) +\N \N Nullable(String) Nullable(String) +0 \N +0 \N +on + join_use_nulls +l \N TODO Nullable(String) +l \N TODO Nullable(String) + r \N TODO Nullable(String) +\N r \N Nullable(String) Nullable(String) +l \N TODO Nullable(String) +l \N TODO Nullable(String) + r \N TODO Nullable(String) +\N r \N Nullable(String) Nullable(String) +0 \N +0 \N +using + join_use_nulls +l \N TODO Nullable(String) +l \N TODO Nullable(String) + \N TODO Nullable(String) +\N \N Nullable(String) Nullable(String) +l \N TODO Nullable(String) +l \N TODO Nullable(String) + \N TODO Nullable(String) +\N \N Nullable(String) Nullable(String) +0 \N +0 \N diff --git a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql new file mode 100644 index 00000000000..b38ac854bc7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.sql @@ -0,0 +1,69 @@ +USE test; + +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +DROP TABLE IF EXISTS t3; +CREATE TABLE t1 ( id String ) ENGINE = Memory; +CREATE TABLE t2 ( id Nullable(String) ) ENGINE = Memory; +CREATE TABLE t3 ( id Nullable(String), not_id Nullable(String) ) ENGINE = Memory; + +insert into t1 values ('l'); +insert into t3 (id) values ('r'); + +SELECT 'on'; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 ON t2.id = t3.id; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 ON t2.id = t3.id; + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 ON t1.id = t3.id; + +SELECT 'using'; + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 USING(id); + +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 LEFT JOIN t3 USING(id); +SELECT *, toTypeName(t1.id), toTypeName(t3.id) FROM t1 FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 USING(id); + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 USING(id); + +SET join_use_nulls = 1; +-- TODO: toTypeName(t1.id) String -> Nullable(String) + +SELECT 'on + join_use_nulls'; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 ON t2.id = t3.id; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 LEFT JOIN t3 ON t1.id = t3.id; +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 FULL JOIN t3 ON t1.id = t3.id; +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 ON t2.id = t3.id; + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 ON t1.id = t3.id; +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 ON t1.id = t3.id; + +SELECT 'using + join_use_nulls'; + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 ANY FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 ANY FULL JOIN t3 USING(id); + +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 LEFT JOIN t3 USING(id); +SELECT *, 'TODO', toTypeName(t3.id) FROM t1 FULL JOIN t3 USING(id); +SELECT *, toTypeName(t2.id), toTypeName(t3.id) FROM t2 FULL JOIN t3 USING(id); + +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 ANY LEFT JOIN t3 USING(id); +SELECT t3.id = 'l', t3.not_id = 'l' FROM t1 LEFT JOIN t3 USING(id); + +DROP TABLE t1; +DROP TABLE t2; From 42dd521e4522bb69632847988ee941eb3ffab982 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 27 Mar 2019 00:56:46 +0300 Subject: [PATCH 43/78] Added a setting to disallow Hyperscan --- dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Core/Settings.h | 4 +++- dbms/src/Functions/FunctionsStringSearch.cpp | 4 ++++ dbms/src/Functions/FunctionsStringSearch.h | 11 ++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index f974b2bdaf6..141fc9007b8 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -420,6 +420,7 @@ namespace ErrorCodes extern const int NO_COMMON_COLUMNS_WITH_PROTOBUF_SCHEMA = 443; extern const int UNKNOWN_PROTOBUF_FORMAT = 444; extern const int CANNOT_MPROTECT = 445; + extern const int FUNCTION_NOT_ALLOWED = 446; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 3f71e8af4c0..de7858d227f 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -305,7 +305,9 @@ struct Settings M(SettingBool, allow_experimental_cross_to_join_conversion, true, "Convert CROSS JOIN to INNER JOIN if possible") \ M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.") \ M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only for 'mysql' table function.") \ - M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.")\ + M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.") \ + \ + M(SettingBool, allow_hyperscan, true, "Allow functions that use Hyperscan library. Disable to avoid potentially long compilation times and excessive resource usage.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ TYPE NAME {DEFAULT}; diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 7c22afc9020..a7096786dc8 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -340,6 +340,7 @@ template struct MultiSearchImpl { using ResultType = UInt8; + static constexpr bool is_using_hyperscan = false; static void vector_constant( const ColumnString::Chars & haystack_data, @@ -355,6 +356,7 @@ template struct MultiSearchFirstPositionImpl { using ResultType = UInt64; + static constexpr bool is_using_hyperscan = false; static void vector_constant( const ColumnString::Chars & haystack_data, @@ -374,6 +376,7 @@ template struct MultiSearchFirstIndexImpl { using ResultType = UInt64; + static constexpr bool is_using_hyperscan = false; static void vector_constant( const ColumnString::Chars & haystack_data, @@ -610,6 +613,7 @@ struct MultiMatchAnyImpl { static_assert(static_cast(FindAny) + static_cast(FindAnyIndex) == 1); using ResultType = Type; + static constexpr bool is_using_hyperscan = true; static void vector_constant( const ColumnString::Chars & haystack_data, diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index d6e1489a97e..89f51470da8 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -11,8 +11,10 @@ #include #include #include +#include #include + namespace DB { /** Search and replace functions in strings: @@ -67,6 +69,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_COLUMN; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int FUNCTION_NOT_ALLOWED; } template @@ -285,7 +288,13 @@ class FunctionsMultiStringSearch : public IFunction public: static constexpr auto name = Name::name; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static FunctionPtr create(const Context & context) + { + if (Impl::is_using_hyperscan && !context.getSettingsRef().allow_hyperscan) + throw Exception("Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0", ErrorCodes::FUNCTION_NOT_ALLOWED); + + return std::make_shared(); + } String getName() const override { return name; } From 874e06d284d6002621c35e758e27b628b8df451c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 27 Mar 2019 01:03:02 +0300 Subject: [PATCH 44/78] Added a test --- .../0_stateless/00927_disable_hyperscan.reference | 9 +++++++++ .../queries/0_stateless/00927_disable_hyperscan.sql | 6 ++++++ 2 files changed, 15 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00927_disable_hyperscan.reference create mode 100644 dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql diff --git a/dbms/tests/queries/0_stateless/00927_disable_hyperscan.reference b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.reference new file mode 100644 index 00000000000..7cac306b9db --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.reference @@ -0,0 +1,9 @@ +1 +1 +1 +1 +0 +1 +1 +1 +0 diff --git a/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql new file mode 100644 index 00000000000..1af9c129284 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00927_disable_hyperscan.sql @@ -0,0 +1,6 @@ +SET allow_hyperscan = 1; +SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); +SET allow_hyperscan = 0; +SELECT multiMatchAny(arrayJoin(['hello', 'world', 'hellllllllo', 'wororld', 'abc']), ['hel+o', 'w(or)*ld']); -- { serverError 446 } + +SELECT multiSearchAny(arrayJoin(['hello', 'world', 'hello, world', 'abc']), ['hello', 'world']); From e92dcd47741237d47dca6e259bcef7ee40980a99 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 27 Mar 2019 01:47:38 +0300 Subject: [PATCH 45/78] fix clang release build --- dbms/src/Interpreters/Join.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index cf603b4efbd..148b92aca7f 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -229,7 +229,7 @@ ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool n { if (nullable) convertColumnToNullable(column); - return column; + return std::move(column); } From 6e3f7f62ebd8944b6faff7d4ccacb675f43b816a Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 27 Mar 2019 13:13:46 +0300 Subject: [PATCH 46/78] DOCAPI-5755: EN review and RU translation for the enable_optimize_predicate_expression setting desctiprion (#4818) --- docs/en/operations/settings/settings.md | 14 ++++----- docs/ru/operations/settings/settings.md | 40 +++++++++++++++++++------ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 00c5d476771..17c134776fd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -23,27 +23,27 @@ The possible values are: ## enable_optimize_predicate_expression -Turns on the predicate pushdown in `SELECT` queries. +Turns on predicate pushdown in `SELECT` queries. -Predicate pushdown may significantly reduce the network traffic for distributed queries. +Predicate pushdown may significantly reduce network traffic for distributed queries. Possible values: -- 0 — The functionality is turned off. -- 1 — The functionality is turned on. +- 0 — Functionality is turned off. +- 1 — Functionality is turned on. Default value: 0. **Usage** -Let's consider the following queries: +Consider the following queries: 1. `SELECT count() FROM test_table WHERE date = '2018-10-10'` 2. `SELECT count() FROM (SELECT * FROM test_table) WHERE date = '2018-10-10'` -If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal, because ClickHouse apply `WHERE` to subquery when processing it. +If `enable_optimize_predicate_expression = 1`, then the execution time of these queries is equal, because ClickHouse applies `WHERE` to the subquery when processing it. -If `enable_optimize_predicate_expression = 0`, then the second query is executed much slower, because the `WHERE` clause applies to all the data after the subquery finishes. +If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer, because the `WHERE` clause applies to all the data after the subquery finishes. ## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index ce8c52d86fa..c305831885e 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -20,7 +20,31 @@ ClickHouse применяет настройку в тех случаях, ко - `global` - заменяет запрос `IN` / `JOIN` на `GLOBAL IN` / `GLOBAL JOIN.` - `allow` - разрешает использование таких подзапросов. -## fallback_to_stale_replicas_for_distributed_queries +## enable_optimize_predicate_expression + +Включает пробрасывание предикатов в подзапросы для запросов `SELECT`. + +Пробрасывание предикатов может существенно уменьшить сетевой трафик для распределенных запросов. + +Возможные значения: + +- 0 — функциональность выключена. +- 1 — функциональность включена. + +Значение по умолчанию: 0. + +**Использование** + +Рассмотрим следующие запросы: + +1. `SELECT count() FROM test_table WHERE date = '2018-10-10'` +2. `SELECT count() FROM (SELECT * FROM test_table) WHERE date = '2018-10-10'` + +Если `enable_optimize_predicate_expression = 1`, то время выполнения запросов одинаковое, так как ClickHouse применяет `WHERE` к подзапросу сразу при его обработке. + +Если `enable_optimize_predicate_expression = 0`, то время выполнения второго запроса намного больше, потому что секция `WHERE` применяется к данным уже после завершения подзапроса. + +## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} Форсирует запрос в устаревшую реплику в случае, если актуальные данные недоступны. Смотрите "[Репликация](../../operations/table_engines/replication.md)". @@ -91,12 +115,12 @@ ClickHouse применяет настройку в тех случаях, ко ## join_use_nulls {#settings-join_use_nulls} -Устанавливает тип поведения [JOIN](../../query_language/select.md). При присоединении таблиц могут появляться пустые ячейки. ClickHouse заполняет их по-разному в зависимости от настройки. +Устанавливает тип поведения [JOIN](../../query_language/select.md). При объединении таблиц могут появиться пустые ячейки. ClickHouse заполняет их по-разному в зависимости от настроек. -**Допустимые значения** +**Возможные значения** -- 0 — пустые ячейки заполняются значением по умолчанию для типа соответствующего столбца. -- 1 — подведение `JOIN` такое же, как в стандартном SQL. Тип соответствующего столбца конвертируется в [Nullable](../../data_types/nullable.md#data_type-nullable), а пустые ячейки заполняются значениями [NULL](../../query_language/syntax.md). +- 0 — пустые ячейки заполняются значением по умолчанию соответствующего типа поля. +- 1 — `JOIN` ведет себя как в стандартном SQL. Тип соответствующего поля преобразуется в [Nullable](../../data_types/nullable.md#data_type-nullable), а пустые ячейки заполняются значениями [NULL](../../query_language/syntax.md). **Значение по умолчанию**: 0. @@ -112,10 +136,7 @@ ClickHouse применяет настройку в тех случаях, ко Служит для тех же целей что и `max_block_size`, но задает реккомедуемый размер блоков в байтах, выбирая адаптивное количество строк в блоке. При этом размер блока не может быть более `max_block_size` строк. - -Значение по умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. - -Отключена по умолчанию (значение 0). Работает только при чтении из MergeTree-движков. +По умолчанию: 1,000,000. Работает только при чтении из MergeTree-движков. ## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution} @@ -449,3 +470,4 @@ ClickHouse применяет настройку в тех случаях, ко - [insert_quorum_timeout](#settings-insert_quorum_timeout) [Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/settings/settings/) + From 2a5b4b3b312f45fb0c10bfef501f4c9d7c6db04c Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 27 Mar 2019 16:37:11 +0300 Subject: [PATCH 47/78] fix ANY JOIN nulls in right key (ALL JOIN is still affected) --- dbms/src/Interpreters/Join.cpp | 66 ++++++++++++------- .../00848_join_use_nulls_segfault.reference | 16 ++--- .../00852_any_join_nulls.reference | 2 + .../0_stateless/00852_any_join_nulls.sql | 19 ++++++ 4 files changed, 72 insertions(+), 31 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00852_any_join_nulls.reference create mode 100644 dbms/tests/queries/0_stateless/00852_any_join_nulls.sql diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 148b92aca7f..db7b9179a77 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -47,6 +47,39 @@ static std::unordered_map requiredRightKeys(const Names & k return required; } +static void convertColumnToNullable(ColumnWithTypeAndName & column) +{ + if (column.type->isNullable()) + return; + + column.type = makeNullable(column.type); + if (column.column) + column.column = makeNullable(column.column); +} + +/// Converts column to nullable if needed. No backward convertion. +static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) +{ + if (nullable) + convertColumnToNullable(column); + return std::move(column); +} + +static ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable, const ColumnUInt8 & negative_null_map) +{ + if (nullable) + { + convertColumnToNullable(column); + if (negative_null_map.size()) + { + MutableColumnPtr mutable_column = (*std::move(column.column)).mutate(); + static_cast(*mutable_column).applyNegatedNullMap(negative_null_map); + column.column = std::move(mutable_column); + } + } + return std::move(column); +} + Join::Join(const Names & key_names_right_, bool use_nulls_, const SizeLimits & limits, ASTTableJoin::Kind kind_, ASTTableJoin::Strictness strictness_, bool any_take_last_row_) @@ -213,26 +246,6 @@ size_t Join::getTotalByteCount() const return res; } - -static void convertColumnToNullable(ColumnWithTypeAndName & column) -{ - if (column.type->isNullable()) - return; - - column.type = makeNullable(column.type); - if (column.column) - column.column = makeNullable(column.column); -} - -/// Converts column to nullable if needed. No backward convertion. -ColumnWithTypeAndName correctNullability(ColumnWithTypeAndName && column, bool nullable) -{ - if (nullable) - convertColumnToNullable(column); - return std::move(column); -} - - void Join::setSampleBlock(const Block & block) { std::unique_lock lock(rwlock); @@ -721,7 +734,7 @@ void Join::joinBlockImpl( std::unique_ptr offsets_to_replicate; - IColumn::Filter filter = switchJoinRightColumns( + IColumn::Filter row_filter = switchJoinRightColumns( type, maps_, block.rows(), key_columns, key_sizes, added, null_map, offsets_to_replicate); for (size_t i = 0; i < added.size(); ++i) @@ -733,6 +746,12 @@ void Join::joinBlockImpl( if constexpr (STRICTNESS == ASTTableJoin::Strictness::Any) { + /// Some trash to represent IColumn::Filter as ColumnUInt8 needed for ColumnNullable::applyNullMap() + auto null_map_filter_ptr = ColumnUInt8::create(); + ColumnUInt8 & null_map_filter = static_cast(*null_map_filter_ptr); + null_map_filter.getData().swap(row_filter); + const IColumn::Filter & filter = null_map_filter.getData(); + constexpr bool inner_or_right = static_in_v; if constexpr (inner_or_right) { @@ -779,7 +798,7 @@ void Join::joinBlockImpl( } bool is_nullable = use_nulls || it->second->isNullable(); - block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable)); + block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable, null_map_filter)); } } } @@ -808,7 +827,7 @@ void Join::joinBlockImpl( { if (size_t to_insert = (*offsets_to_replicate)[row] - last_offset) { - if (!filter[row]) + if (!row_filter[row]) mut_column->insertDefault(); else for (size_t dup = 0; dup < to_insert; ++dup) @@ -818,6 +837,7 @@ void Join::joinBlockImpl( last_offset = (*offsets_to_replicate)[row]; } + /// TODO: null_map_filter bool is_nullable = (use_nulls && left_or_full) || it->second->isNullable(); block.insert(correctNullability({std::move(mut_column), col.type, right_name}, is_nullable)); } diff --git a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference index f8527a732c5..c2b37fba363 100644 --- a/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference +++ b/dbms/tests/queries/0_stateless/00848_join_use_nulls_segfault.reference @@ -1,13 +1,13 @@ on -l \N String Nullable(String) -l \N String Nullable(String) +l \N \N String Nullable(String) +l \N \N String Nullable(String) r \N String Nullable(String) \N r \N Nullable(String) Nullable(String) l \N String Nullable(String) l \N String Nullable(String) r \N String Nullable(String) \N r \N Nullable(String) Nullable(String) -0 \N +\N \N 0 \N using l \N String Nullable(String) @@ -18,18 +18,18 @@ l \N String Nullable(String) l \N String Nullable(String) \N String Nullable(String) \N \N Nullable(String) Nullable(String) -0 \N +\N \N 0 \N on + join_use_nulls -l \N TODO Nullable(String) -l \N TODO Nullable(String) +l \N \N TODO Nullable(String) +l \N \N TODO Nullable(String) r \N TODO Nullable(String) \N r \N Nullable(String) Nullable(String) l \N TODO Nullable(String) l \N TODO Nullable(String) r \N TODO Nullable(String) \N r \N Nullable(String) Nullable(String) -0 \N +\N \N 0 \N using + join_use_nulls l \N TODO Nullable(String) @@ -40,5 +40,5 @@ l \N TODO Nullable(String) l \N TODO Nullable(String) \N TODO Nullable(String) \N \N Nullable(String) Nullable(String) -0 \N +\N \N 0 \N diff --git a/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference b/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference new file mode 100644 index 00000000000..b0d5371e4f7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00852_any_join_nulls.reference @@ -0,0 +1,2 @@ +1 0 +\N 1 diff --git a/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql b/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql new file mode 100644 index 00000000000..b3f4b332a46 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00852_any_join_nulls.sql @@ -0,0 +1,19 @@ +USE test; + +DROP TABLE IF EXISTS table1; +DROP TABLE IF EXISTS table2; +CREATE TABLE table1 ( id String ) ENGINE = Log; +CREATE TABLE table2 ( parent_id String ) ENGINE = Log; + +insert into table1 values ('1'); + +SELECT table2.parent_id = '', isNull(table2.parent_id) +FROM table1 ANY LEFT JOIN table2 ON table1.id = table2.parent_id; + +SET join_use_nulls = 1; + +SELECT table2.parent_id = '', isNull(table2.parent_id) +FROM table1 ANY LEFT JOIN table2 ON table1.id = table2.parent_id; + +DROP TABLE test.table1; +DROP TABLE test.table2; From 7d1be03a161b07b94186f8e2d1c917ceef0abf6b Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 27 Mar 2019 18:42:24 +0300 Subject: [PATCH 48/78] CLICKHOUSE-4386 Version in exceptions --- dbms/src/Common/Exception.cpp | 21 +++++++++++---------- libs/libdaemon/src/BaseDaemon.cpp | 30 ++++++++++++++---------------- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index b7bbc1bf59e..85c01025aed 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -1,17 +1,14 @@ +#include "Exception.h" + #include #include - #include - #include - #include #include #include - -#include #include - +#include namespace DB { @@ -24,6 +21,10 @@ namespace ErrorCodes extern const int CANNOT_TRUNCATE_FILE; } +const char * getVersion() +{ + return VERSION_STRING; +} std::string errnoToString(int code, int e) { @@ -81,13 +82,13 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded } catch (const Exception & e) { - stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); + stream << "(" << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); } catch (const Poco::Exception & e) { try { - stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() + stream << "(" << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText(); } catch (...) {} @@ -102,7 +103,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); + stream << "(" << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); } catch (...) {} } @@ -116,7 +117,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; + stream << "(" << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; } catch (...) {} } diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index 08bece59b51..d63fb7fb179 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -1,10 +1,8 @@ #include #include #include - #include #include - #include #include #include @@ -15,18 +13,6 @@ #include #include #include - -#if USE_UNWIND - #define UNW_LOCAL_ONLY - #include -#endif - -#ifdef __APPLE__ -// ucontext is not available without _XOPEN_SOURCE -#define _XOPEN_SOURCE -#endif -#include - #include #include #include @@ -68,6 +54,18 @@ #include #include +#if USE_UNWIND + #define UNW_LOCAL_ONLY + #include +#endif + +#ifdef __APPLE__ +// ucontext is not available without _XOPEN_SOURCE +#define _XOPEN_SOURCE +#endif +#include + + /** For transferring information from signal handler to a separate thread. * If you need to do something serious in case of a signal (example: write a message to the log), * then sending information to a separate thread through pipe and doing all the stuff asynchronously @@ -301,13 +299,13 @@ private: private: void onTerminate(const std::string & message, ThreadNumber thread_num) const { - LOG_ERROR(log, "(from thread " << thread_num << ") " << message); + LOG_ERROR(log, "(" << VERSION_STRING << ") (from thread " << thread_num << ") " << message); } void onFault(int sig, siginfo_t & info, ucontext_t & context, ThreadNumber thread_num) const { LOG_ERROR(log, "########################################"); - LOG_ERROR(log, "(from thread " << thread_num << ") " + LOG_ERROR(log, "(" << VERSION_STRING << ") (from thread " << thread_num << ") " << "Received signal " << strsignal(sig) << " (" << sig << ")" << "."); void * caller_address = nullptr; From 0fff8a785b3f039d77d6c8e3b6706c19fe1f5b91 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 27 Mar 2019 19:12:47 +0300 Subject: [PATCH 49/78] Add libglibc-compatibility/musl/getentropy.c --- libs/libglibc-compatibility/CMakeLists.txt | 1 + libs/libglibc-compatibility/musl/getentropy.c | 33 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 libs/libglibc-compatibility/musl/getentropy.c diff --git a/libs/libglibc-compatibility/CMakeLists.txt b/libs/libglibc-compatibility/CMakeLists.txt index 5bf83b9263f..29d66596386 100644 --- a/libs/libglibc-compatibility/CMakeLists.txt +++ b/libs/libglibc-compatibility/CMakeLists.txt @@ -19,6 +19,7 @@ musl/sched_cpucount.c musl/glob.c musl/exp2f.c musl/pwritev.c +musl/getentropy.c musl/getrandom.c musl/fcntl.c musl/timespec_get.c diff --git a/libs/libglibc-compatibility/musl/getentropy.c b/libs/libglibc-compatibility/musl/getentropy.c new file mode 100644 index 00000000000..825a16ece72 --- /dev/null +++ b/libs/libglibc-compatibility/musl/getentropy.c @@ -0,0 +1,33 @@ +#define _DEFAULT_SOURCE +#include +#include +#include +#include + +int getentropy(void *buffer, size_t len) +{ + int cs, ret = 0; + char *pos = buffer; + + if (len > 256) { + errno = EIO; + return -1; + } + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs); + + while (len) { + ret = getrandom(pos, len, 0); + if (ret < 0) { + if (errno == EINTR) continue; + else break; + } + pos += ret; + len -= ret; + ret = 0; + } + + pthread_setcancelstate(cs, 0); + + return ret; +} From 7bcf8ec3440829304945c3ac85dc623bc20cd494 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 27 Mar 2019 18:05:05 +0300 Subject: [PATCH 50/78] Fix install in shared mode --- dbms/programs/CMakeLists.txt | 1 + dbms/programs/clickhouse-split-helper | 6 ++++++ debian/control | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 6015f1c1b75..b6928652801 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -93,6 +93,7 @@ if (CLICKHOUSE_ONE_SHARED) target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_PERFORMANCE_TEST_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_COMPILER_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK}) target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_PERFORMANCE_TEST_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_COMPILER_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") + install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) endif() if (CLICKHOUSE_SPLIT_BINARY) diff --git a/dbms/programs/clickhouse-split-helper b/dbms/programs/clickhouse-split-helper index 2495160e02a..14a86f76097 100755 --- a/dbms/programs/clickhouse-split-helper +++ b/dbms/programs/clickhouse-split-helper @@ -1,5 +1,11 @@ #!/bin/sh +# Helper for split build mode. +# Allows to run commands like +# clickhouse client +# clickhouse server +# ... + set -e CMD=$1 shift diff --git a/debian/control b/debian/control index 493ed628f98..78ea9069731 100644 --- a/debian/control +++ b/debian/control @@ -28,8 +28,8 @@ Package: clickhouse-common-static Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata Suggests: clickhouse-common-static-dbg -Replaces: clickhouse-server-base -Provides: clickhouse-server-base +Replaces: clickhouse-common, clickhouse-server-base +Provides: clickhouse-common, clickhouse-server-base Description: Common files for ClickHouse Yandex ClickHouse is a column-oriented database management system that allows generating analytical data reports in real time. From 4c71f1ba4b837ae7d8da4bea61d601140fbc95c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 27 Mar 2019 21:58:08 +0300 Subject: [PATCH 51/78] Updated tests, part 2 --- ...cated_merge_tree_alter_zookeeper.reference | 48 +++++++++---------- .../00643_cast_zookeeper.reference | 2 +- .../00753_comment_columns_zookeeper.reference | 4 +- ...fy_order_by_replicated_zookeeper.reference | 4 +- ...dices_alter_replicated_zookeeper.reference | 24 +++++----- ...om_compression_codecs_replicated.reference | 2 +- ...r_custom_compression_codecs_replicated.sql | 3 +- 7 files changed, 44 insertions(+), 43 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference b/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference index 4d45c186220..931a9c029e2 100644 --- a/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper.reference @@ -1,22 +1,22 @@ d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 10 42 d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 9 41 1992-01-01 08:00:00 2015-01-01 10 42 0000-00-00 00:00:00 d Date @@ -25,14 +25,14 @@ i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.ui8 Array(UInt8) n.s Array(String) -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] 2015-01-01 10 42 0000-00-00 00:00:00 [] [] @@ -43,7 +43,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -51,7 +51,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 9 41 1992-01-01 08:00:00 [] [] [] @@ -64,7 +64,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -73,7 +73,7 @@ n.ui8 Array(UInt8) n.s Array(String) n.d Array(Date) s String DEFAULT \'0\' -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), s String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `n.d` Array(Date), `s` String DEFAULT \'0\') ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] ['2000-01-01','2000-01-01','2000-01-03'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] ['2000-01-01','2000-01-01','2000-01-03'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] ['0000-00-00','0000-00-00','0000-00-00'] 0 @@ -86,7 +86,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -94,7 +94,7 @@ dt DateTime n.ui8 Array(UInt8) n.s Array(String) s Int64 -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` Int64) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 @@ -108,7 +108,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 n.d Array(Date) -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -117,7 +117,7 @@ n.ui8 Array(UInt8) n.s Array(String) s UInt32 n.d Array(Date) -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), s UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.ui8` Array(UInt8), `n.s` Array(String), `s` UInt32, `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 [10,20,30] ['asd','qwe','qwe'] 100500 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 7 39 2014-07-14 13:26:50 [10,20,30] ['120','130','140'] 0 ['0000-00-00','0000-00-00','0000-00-00'] 2015-01-01 8 40 2012-12-12 12:12:12 [1,2,3] ['12','13','14'] 0 ['0000-00-00','0000-00-00','0000-00-00'] @@ -129,14 +129,14 @@ i32 Int32 dt DateTime n.s Array(String) s UInt32 -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.s` Array(String), s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.s` Array(String), `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime n.s Array(String) s UInt32 -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, `n.s` Array(String), s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `n.s` Array(String), `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 ['asd','qwe','qwe'] 100500 2015-01-01 7 39 2014-07-14 13:26:50 ['120','130','140'] 0 2015-01-01 8 40 2012-12-12 12:12:12 ['12','13','14'] 0 @@ -147,13 +147,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -166,7 +166,7 @@ dt DateTime s UInt32 n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 @@ -174,7 +174,7 @@ dt DateTime s UInt32 n.s Array(String) n.d Array(Date) -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32, `n.s` Array(String), `n.d` Array(Date)) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 [] [] 2015-01-01 7 39 2014-07-14 13:26:50 0 [] [] 2015-01-01 8 40 2012-12-12 12:12:12 0 [] [] @@ -185,13 +185,13 @@ k UInt64 i32 Int32 dt DateTime s UInt32 -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt DateTime s UInt32 -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt DateTime, s UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` DateTime, `s` UInt32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 13:26:50 100500 2015-01-01 7 39 2014-07-14 13:26:50 0 2015-01-01 8 40 2012-12-12 12:12:12 0 @@ -202,13 +202,13 @@ k UInt64 i32 Int32 dt Date s DateTime -CREATE TABLE test.replicated_alter1 ( d Date, k UInt64, i32 Int32, dt Date, s DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) +CREATE TABLE test.replicated_alter1 (`d` Date, `k` UInt64, `i32` Int32, `dt` Date, `s` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r1\', d, k, 8192) d Date k UInt64 i32 Int32 dt Date s DateTime -CREATE TABLE test.replicated_alter2 ( d Date, k UInt64, i32 Int32, dt Date, s DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) +CREATE TABLE test.replicated_alter2 (`d` Date, `k` UInt64, `i32` Int32, `dt` Date, `s` DateTime) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/alter\', \'r2\', d, k, 8192) 2015-01-01 6 38 2014-07-15 1970-01-02 06:55:00 2015-01-01 7 39 2014-07-14 0000-00-00 00:00:00 2015-01-01 8 40 2012-12-12 0000-00-00 00:00:00 diff --git a/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference b/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference index abdca87a873..27f2760430c 100644 --- a/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00643_cast_zookeeper.reference @@ -1,4 +1,4 @@ -CREATE TABLE test.cast1 ( x UInt8, e Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') ORDER BY e SETTINGS index_granularity = 8192 +CREATE TABLE test.cast1 (`x` UInt8, `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_cast', 'r1') ORDER BY e SETTINGS index_granularity = 8192 x UInt8 e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference index 117ce8750b5..178f2142b9f 100644 --- a/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00753_comment_columns_zookeeper.reference @@ -1,6 +1,6 @@ -CREATE TABLE test.check_comments ( column_name1 UInt8 DEFAULT 1 COMMENT \'comment\', column_name2 UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +CREATE TABLE test.check_comments (`column_name1` UInt8 DEFAULT 1 COMMENT \'comment\', `column_name2` UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 comment column_name2 UInt8 non default comment -CREATE TABLE test.check_comments ( column_name1 UInt8 DEFAULT 1 COMMENT \'another comment\', column_name2 UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 +CREATE TABLE test.check_comments (`column_name1` UInt8 DEFAULT 1 COMMENT \'another comment\', `column_name2` UInt8 COMMENT \'non default comment\') ENGINE = ReplicatedMergeTree(\'clickhouse/tables/test_comments\', \'r1\') ORDER BY column_name1 SETTINGS index_granularity = 8192 column_name1 UInt8 DEFAULT 1 another comment column_name2 UInt8 non default comment diff --git a/dbms/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference index 5faefa91056..ebe30941f3f 100644 --- a/dbms/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00754_alter_modify_order_by_replicated_zookeeper.reference @@ -9,6 +9,6 @@ 1 2 1 30 1 2 4 90 *** Check SHOW CREATE TABLE *** -CREATE TABLE test.summing_r2 ( x UInt32, y UInt32, z UInt32, val UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2 (`x` UInt32, `y` UInt32, `z` UInt32, `val` UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, -z) SETTINGS index_granularity = 8192 *** Check SHOW CREATE TABLE after offline ALTER *** -CREATE TABLE test.summing_r2 ( x UInt32, y UInt32, z UInt32, t UInt32, val UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, t * t) SETTINGS index_granularity = 8192 +CREATE TABLE test.summing_r2 (`x` UInt32, `y` UInt32, `z` UInt32, `t` UInt32, `val` UInt32) ENGINE = ReplicatedSummingMergeTree(\'/clickhouse/tables/test/summing\', \'r2\') PRIMARY KEY (x, y) ORDER BY (x, y, t * t) SETTINGS index_granularity = 8192 diff --git a/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference b/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference index ce3192788e1..b2c2b41f460 100644 --- a/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00836_indices_alter_replicated_zookeeper.reference @@ -1,5 +1,5 @@ -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 2 1 2 @@ -14,8 +14,8 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 T 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx3 u64 - i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 + i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -28,10 +28,10 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx3 u64 - i32 T 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter1\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 4 1 5 @@ -44,14 +44,14 @@ CREATE TABLE test.minmax_idx_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 * i32 T 3 2 19 9 65 75 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 (`u64` UInt64, `i32` Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r (`u64` UInt64, `i32` Int32, INDEX idx1 u64 + i32 TYPE minmax GRANULARITY 10, INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 10) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 3 1 2 1 3 -CREATE TABLE test.minmax_idx2 ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 -CREATE TABLE test.minmax_idx2_r ( u64 UInt64, i32 Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2 (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r1\') ORDER BY u64 SETTINGS index_granularity = 8192 +CREATE TABLE test.minmax_idx2_r (`u64` UInt64, `i32` Int32) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/indices_alter2\', \'r2\') ORDER BY u64 SETTINGS index_granularity = 8192 1 2 1 3 1 2 diff --git a/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference b/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference index f178ce23938..29bda49a8e5 100644 --- a/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference +++ b/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.reference @@ -20,7 +20,7 @@ 274972506.6 9175437371954010821 9175437371954010821 -CREATE TABLE test.compression_codec_multiple_more_types_replicated ( id Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), data FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\') ORDER BY tuple() SETTINGS index_granularity = 8192 +CREATE TABLE test.compression_codec_multiple_more_types_replicated (`id` Decimal(38, 13) CODEC(ZSTD(1), LZ4, ZSTD(1), ZSTD(1), Delta(2), Delta(4), Delta(1), LZ4HC(0)), `data` FixedString(12) CODEC(ZSTD(1), ZSTD(1), Delta(1), Delta(1), Delta(1), NONE, NONE, NONE, LZ4HC(0)), `ddd.age` Array(UInt8) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8)), `ddd.Name` Array(String) CODEC(LZ4, LZ4HC(0), NONE, NONE, NONE, ZSTD(1), Delta(8))) ENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test/compression_codec_multiple_more_types_replicated\', \'1\') ORDER BY tuple() SETTINGS index_granularity = 8192 1.5555555555555 hello world! [77] ['John'] 7.1000000000000 xxxxxxxxxxxx [127] ['Henry'] ! diff --git a/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.sql b/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.sql index 6107b3156af..d3bbbacd2a6 100644 --- a/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.sql +++ b/dbms/tests/queries/0_stateless/00910_zookeeper_custom_compression_codecs_replicated.sql @@ -1,6 +1,7 @@ SET send_logs_level = 'none'; -DROP TABLE IF EXISTS test.compression_codec_replicated; +DROP TABLE IF EXISTS test.compression_codec_replicated1; +DROP TABLE IF EXISTS test.compression_codec_replicated2; CREATE TABLE test.compression_codec_replicated1( id UInt64 CODEC(LZ4), From 5f99f44768524b4523c1cbb921306e21f2cc2b3a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 27 Mar 2019 22:12:53 +0300 Subject: [PATCH 52/78] Update CHANGELOG_RU.md --- CHANGELOG_RU.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG_RU.md b/CHANGELOG_RU.md index 99a0a714784..743edeb7eeb 100644 --- a/CHANGELOG_RU.md +++ b/CHANGELOG_RU.md @@ -26,7 +26,7 @@ * Исправлено undefined behaviour в функции `dictIsIn` для словарей типа `cache`. [#4515](https://github.com/yandex/ClickHouse/pull/4515) ([alesapin](https://github.com/alesapin)) * Исправлен deadlock в случае, если запрос SELECT блокирует одну и ту же таблицу несколько раз (например - из разных потоков, либо при выполнении разных подзапросов) и одновременно с этим производится DDL запрос. [#4535](https://github.com/yandex/ClickHouse/pull/4535) ([Alex Zatelepin](https://github.com/ztlpn)) * Настройка `compile_expressions` выключена по-умолчанию до тех пор, пока мы не зафиксируем исходники используемой библиотеки `LLVM` и не будем проверять её под `ASan` (сейчас библиотека LLVM берётся из системы). [#4579](https://github.com/yandex/ClickHouse/pull/4579) ([alesapin](https://github.com/alesapin)) -* Исправлено падение по `std::terminate`, если `invalidate_query` для внешних словарей с истоником `clickhouse` вернул неправильный результат (пустой; более чем одну строку; более чем один столбец). Исправлена ошибка, из-за которой запрос `invalidate_query` производился каждые пять секунд, независимо от указанного `lifetime`. [#4583](https://github.com/yandex/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) +* Исправлено падение по `std::terminate`, если `invalidate_query` для внешних словарей с источником `clickhouse` вернул неправильный результат (пустой; более чем одну строку; более чем один столбец). Исправлена ошибка, из-за которой запрос `invalidate_query` производился каждые пять секунд, независимо от указанного `lifetime`. [#4583](https://github.com/yandex/ClickHouse/pull/4583) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Исправлен deadlock в случае, если запрос `invalidate_query` для внешнего словаря с источником `clickhouse` использовал таблицу `system.dictionaries` или базу данных типа `Dictionary` (редкий случай). [#4599](https://github.com/yandex/ClickHouse/pull/4599) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Исправлена работа CROSS JOIN с пустым WHERE [#4598](https://github.com/yandex/ClickHouse/pull/4598) ([Artem Zuikov](https://github.com/4ertus2)) * Исправлен segfault в функции `replicate` с константным аргументом. [#4603](https://github.com/yandex/ClickHouse/pull/4603) ([alexey-milovidov](https://github.com/alexey-milovidov)) From faf9462c372b100f1e54e4ed0878e1e01b8ccc8f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 27 Mar 2019 22:25:58 +0300 Subject: [PATCH 53/78] Added support for constant-constant case in multi-search functions --- dbms/src/Functions/FunctionsStringSearch.h | 4 ++++ .../0_stateless/00928_multi_match_constant_constant.reference | 3 +++ .../0_stateless/00928_multi_match_constant_constant.sql | 3 +++ 3 files changed, 10 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.reference create mode 100644 dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.sql diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 89f51470da8..1f9544e4513 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -210,6 +210,8 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { @@ -299,6 +301,8 @@ public: String getName() const override { return name; } size_t getNumberOfArguments() const override { return 2; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { diff --git a/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.reference b/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.reference new file mode 100644 index 00000000000..cc2b1466fcb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.reference @@ -0,0 +1,3 @@ +1 +2 +[1,8] diff --git a/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.sql b/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.sql new file mode 100644 index 00000000000..fc3e8ca6b2c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00928_multi_match_constant_constant.sql @@ -0,0 +1,3 @@ +SELECT multiMatchAny('goodbye', ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']); +SELECT multiMatchAnyIndex('goodbye', ['^hello[, ]+world$', 'go+d *bye', 'w(or)+ld']); +SELECT multiSearchAllPositions('hello, world', ['hello', 'world']); From 2fd83f06f570759f31e7b74b5e34655eaeefe6ff Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Mar 2019 00:31:08 +0300 Subject: [PATCH 54/78] Update float.md --- docs/en/data_types/float.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/data_types/float.md b/docs/en/data_types/float.md index 1c908819c1f..7c731d9fbc8 100644 --- a/docs/en/data_types/float.md +++ b/docs/en/data_types/float.md @@ -25,7 +25,7 @@ SELECT 1 - 0.9 - The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). - Floating-point calculations might result in numbers such as infinity (`Inf`) and "not-a-number" (`NaN`). This should be taken into account when processing the results of calculations. -- When reading floating point numbers from rows, the result might not be the nearest machine-representable number. +- When parsing floating point numbers from text, the result might not be the nearest machine-representable number. ## NaN and Inf From da68dd029700dbf00837d730db31128a9facae52 Mon Sep 17 00:00:00 2001 From: Gary Dotzler <47778004+geldot@users.noreply.github.com> Date: Thu, 28 Mar 2019 09:15:57 +0000 Subject: [PATCH 55/78] Update Buffer docs for PREWHERE PREWHERE support was added to the Buffer engine in #4671 --- docs/en/operations/table_engines/buffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/table_engines/buffer.md b/docs/en/operations/table_engines/buffer.md index 7f6a17223be..d8e240b8013 100644 --- a/docs/en/operations/table_engines/buffer.md +++ b/docs/en/operations/table_engines/buffer.md @@ -38,7 +38,7 @@ If you need to run ALTER for a subordinate table and the Buffer table, we recomm If the server is restarted abnormally, the data in the buffer is lost. -PREWHERE, FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. Because of this, we recommend only using the Buffer table for writing, while reading from the destination table. +FINAL and SAMPLE do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table. When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table. From eb4cc81176d35f56d1e454eb9ba0bbb8dbfb1a73 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 28 Mar 2019 12:29:48 +0300 Subject: [PATCH 56/78] Add llvm-8 to stress test docker image --- docker/test/stress/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index f4f4b71ddcd..6a53b8e8203 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -17,7 +17,8 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet + telnet \ + llvm-8 COPY ./stress /stress COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml @@ -28,7 +29,9 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ dpkg -i package_folder/clickhouse-client_*.deb; \ dpkg -i package_folder/clickhouse-test_*.deb; \ + ln -s /usr/lib/llvm-8/bin/llvm-symbolizer /usr/bin/llvm-symbolizer; \ echo "TSAN_OPTIONS='halt_on_error=1'" >> /etc/environment; \ + echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-8/bin/llvm-symbolizer" >> /etc/environment; \ echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \ echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-6.0/bin/llvm-symbolizer" >> /etc/environment; \ From 80cc4dc88ce2482c5ead318fa514b18b079bfd62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 28 Mar 2019 13:34:05 +0300 Subject: [PATCH 57/78] Save unit tests binary after binary build --- docker/packager/binary/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index 25a6c844cd2..5b32abfb7ef 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -34,4 +34,4 @@ RUN apt-get update -y \ git \ tzdata -CMD mkdir -p build/build_result && cd build/build_result && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS && ninja && mv ./dbms/programs/clickhouse* /output +CMD mkdir -p build/build_result && cd build/build_result && cmake .. -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS && ninja && mv ./dbms/programs/clickhouse* /output && mv ./dbms/unit_tests_dbms /output From cbffebc22c9e567bedbf41a915bebfa681c1e4a7 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 28 Mar 2019 16:08:38 +0300 Subject: [PATCH 58/78] Print with version --- dbms/src/Common/Exception.cpp | 8 ++++---- dbms/tests/queries/0_stateless/00834_kill_mutation.sh | 5 +++-- .../00834_kill_mutation_replicated_zookeeper.sh | 4 ++-- libs/libdaemon/src/BaseDaemon.cpp | 4 ++-- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/Exception.cpp b/dbms/src/Common/Exception.cpp index 85c01025aed..a9197d6bc7d 100644 --- a/dbms/src/Common/Exception.cpp +++ b/dbms/src/Common/Exception.cpp @@ -82,13 +82,13 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded } catch (const Exception & e) { - stream << "(" << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); + stream << "(version " << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace); } catch (const Poco::Exception & e) { try { - stream << "(" << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() + stream << "(version " << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText(); } catch (...) {} @@ -103,7 +103,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(" << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); + stream << "(version " << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what(); } catch (...) {} } @@ -117,7 +117,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded if (status) name += " (demangling status: " + toString(status) + ")"; - stream << "(" << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; + stream << "(version " << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name; } catch (...) {} } diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh index d70963db8e2..cb48140a368 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation.sh @@ -17,7 +17,8 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1" sleep 0.1 -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" + +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation'" @@ -29,7 +30,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill invalid mutation that ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE toUInt32(s) = 1" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation DELETE WHERE x = 1" -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_1_1_0', '20010101_2_2_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" sleep 0.1 ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation' AND mutation_id = 'mutation_4.txt'" diff --git a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh index dfaa85f2f2b..5fbc3f061d3 100755 --- a/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00834_kill_mutation_replicated_zookeeper.sh @@ -20,7 +20,7 @@ ${CLICKHOUSE_CLIENT} --query="SELECT '*** Create and kill a single invalid mutat ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toUInt32(s) = 1" sleep 1 -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0', '20010101_0_0_0'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0', '20010101_0_0_0'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1'" @@ -34,7 +34,7 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE toU ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.kill_mutation_r1 DELETE WHERE x = 1" sleep 1 -${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0_1', '20010101_0_0_0_1'), latest_fail_time != 0, substr(latest_fail_reason, 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, latest_failed_part IN ('20000101_0_0_0_1', '20010101_0_0_0_1'), latest_fail_time != 0, substr(replaceRegexpOne(latest_fail_reason, '.version [0-9.]+. ', ''), 1, 8) FROM system.mutations WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" ${CLICKHOUSE_CLIENT} --query="KILL MUTATION WHERE database = 'test' AND table = 'kill_mutation_r1' AND mutation_id = '0000000001'" diff --git a/libs/libdaemon/src/BaseDaemon.cpp b/libs/libdaemon/src/BaseDaemon.cpp index d63fb7fb179..c61f74d54d9 100644 --- a/libs/libdaemon/src/BaseDaemon.cpp +++ b/libs/libdaemon/src/BaseDaemon.cpp @@ -299,13 +299,13 @@ private: private: void onTerminate(const std::string & message, ThreadNumber thread_num) const { - LOG_ERROR(log, "(" << VERSION_STRING << ") (from thread " << thread_num << ") " << message); + LOG_ERROR(log, "(version " << VERSION_STRING << ") (from thread " << thread_num << ") " << message); } void onFault(int sig, siginfo_t & info, ucontext_t & context, ThreadNumber thread_num) const { LOG_ERROR(log, "########################################"); - LOG_ERROR(log, "(" << VERSION_STRING << ") (from thread " << thread_num << ") " + LOG_ERROR(log, "(version " << VERSION_STRING << ") (from thread " << thread_num << ") " << "Received signal " << strsignal(sig) << " (" << sig << ")" << "."); void * caller_address = nullptr; From e4d45b7e50951e0e6fec50f6925e395def85d995 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 28 Mar 2019 16:10:42 +0300 Subject: [PATCH 59/78] Fix test include --- dbms/src/IO/tests/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt index c0e8e0527b3..127dc45d9bb 100644 --- a/dbms/src/IO/tests/CMakeLists.txt +++ b/dbms/src/IO/tests/CMakeLists.txt @@ -79,3 +79,6 @@ target_link_libraries (parse_date_time_best_effort PRIVATE clickhouse_common_io) add_executable (zlib_ng_bug zlib_ng_bug.cpp) target_link_libraries (zlib_ng_bug PRIVATE ${Poco_Foundation_LIBRARY}) +if(NOT USE_INTERNAL_POCO_LIBRARY) + target_include_directories(zlib_ng_bug SYSTEM BEFORE PRIVATE ${Poco_INCLUDE_DIRS}) +endif() From 0ac9d5a7c3a94b82db207bd51c3f7abee48967bc Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 28 Mar 2019 13:36:49 +0300 Subject: [PATCH 60/78] Try use CMAKE_LINK_DEPENDS_NO_SHARED --- CMakeLists.txt | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 750a3686c1d..b5f2a88f702 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,11 @@ -project (ClickHouse) -cmake_minimum_required (VERSION 3.3) +project(ClickHouse) +cmake_minimum_required(VERSION 3.3) cmake_policy(SET CMP0023 NEW) - set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json +set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so +set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) +set(CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. option(ENABLE_IPO "Enable inter-procedural optimization (aka LTO)" OFF) # need cmake 3.9+ if(ENABLE_IPO) @@ -38,9 +41,6 @@ if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURC message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive") endif () -# Write compile_commands.json -set(CMAKE_EXPORT_COMPILE_COMMANDS 1) - include (cmake/find_ccache.cmake) if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None") @@ -50,8 +50,6 @@ endif () string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC) message (STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") -set (CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) -set (CMAKE_DEBUG_POSTFIX "d" CACHE STRING "Generate debug library name with a postfix.") # To be consistent with CMakeLists from contrib libs. option (USE_STATIC_LIBRARIES "Set to FALSE to use shared libraries" ON) option (MAKE_STATIC_LIBRARIES "Set to FALSE to make shared libraries" ${USE_STATIC_LIBRARIES}) From 7412bdb4be2fbe2a7f0097ca605a948bbb1d86bb Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Thu, 28 Mar 2019 17:43:19 +0300 Subject: [PATCH 61/78] Update buffer.md --- docs/ru/operations/table_engines/buffer.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/table_engines/buffer.md b/docs/ru/operations/table_engines/buffer.md index 24e456da486..abc20b6a88c 100644 --- a/docs/ru/operations/table_engines/buffer.md +++ b/docs/ru/operations/table_engines/buffer.md @@ -44,7 +44,7 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны. -Для таблиц типа Buffer неправильно работают PREWHERE, FINAL и SAMPLE. Эти условия пробрасываются в таблицу назначения, но не используются для обработки данных в буфере. В связи с этим, рекомендуется использовать таблицу типа Buffer только для записи, а читать из таблицы назначения. +Для таблиц типа Buffer неправильно работают FINAL и SAMPLE. Эти условия пробрасываются в таблицу назначения, но не используются для обработки данных в буфере. В связи с этим, рекомендуется использовать таблицу типа Buffer только для записи, а читать из таблицы назначения. При добавлении данных в Buffer, один из буферов блокируется. Это приводит к задержкам, если одновременно делается чтение из таблицы. From cae4a426c5b068e27b03ef93ad4bb92532b7f66f Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 17:59:25 +0300 Subject: [PATCH 62/78] Hyperscan restriction --- dbms/src/Functions/FunctionsStringSearch.cpp | 10 +++++++--- .../functions/string_search_functions.md | 2 +- .../functions/string_search_functions.md | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index a7096786dc8..8f0e11b5e81 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int ILLEGAL_COLUMN; + extern const int TOO_MANY_BYTES; } @@ -646,14 +647,17 @@ struct MultiMatchAnyImpl return 0; }; const size_t haystack_offsets_size = haystack_offsets.size(); - size_t offset = 0; + UInt64 offset = 0; for (size_t i = 0; i < haystack_offsets_size; ++i) { + UInt64 length = haystack_offsets[i] - offset - 1; + if (length >= std::numeric_limits::max()) + throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); res[i] = 0; hs_scan( hyperscan_regex->get(), reinterpret_cast(haystack_data.data()) + offset, - haystack_offsets[i] - offset - 1, + length, 0, smart_scratch.get(), on_match, @@ -661,7 +665,7 @@ struct MultiMatchAnyImpl offset = haystack_offsets[i]; } #else - /// Fallback if not an intel processor + /// Fallback if do not use hyperscan PaddedPODArray accum(res.size()); memset(res.data(), 0, res.size() * sizeof(res.front())); memset(accum.data(), 0, accum.size()); diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index eaa42610707..509a0512096 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -54,7 +54,7 @@ For patterns to search for substrings in a string, it is better to use LIKE or ' The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) algorithm. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. -Note: this function is in experimental mode because of some [issues](https://github.com/intel/hyperscan/issues/141). +Note: this function is in experimental mode because of some [issues](https://github.com/intel/hyperscan/issues/141). The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. ## multiMatchAnyIndex(haystack, [pattern_1, pattern_2, ..., pattern_n]) diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 7607d9124a2..678dd018f38 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -47,7 +47,7 @@ То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется алгоритм [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. -Примечание: эта функция находится пока в экспериментальном режиме из-за некоторых [проблем](https://github.com/intel/hyperscan/issues/141). +Примечание: эта функция находится пока в экспериментальном режиме из-за некоторых [проблем](https://github.com/intel/hyperscan/issues/141). Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ## multiMatchAnyIndex(haystack, [pattern_1, pattern_2, ..., pattern_n]) From a791e3155b318000c2f1526e54ca957a3f96ba6e Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 18:12:37 +0300 Subject: [PATCH 63/78] More restrictions added --- docs/en/query_language/functions/string_search_functions.md | 4 +++- docs/ru/query_language/functions/string_search_functions.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 509a0512096..183ab93431b 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -39,6 +39,8 @@ Returns 1, if at least one string `needle_i` matches the string `haystack` and 0 For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. +Note: in all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. + ## match(haystack, pattern) Checks whether the string matches the `pattern` regular expression. A `re2` regular expression. The [syntax](https://github.com/google/re2/wiki/Syntax) of the `re2` regular expressions is more limited than the syntax of the Perl regular expressions. @@ -52,7 +54,7 @@ For patterns to search for substrings in a string, it is better to use LIKE or ' ## multiMatchAny(haystack, [pattern_1, pattern_2, ..., pattern_n]) -The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) algorithm. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. +The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) library. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. Note: this function is in experimental mode because of some [issues](https://github.com/intel/hyperscan/issues/141). The length of any of the `haystack` string must be less than 232 bytes otherwise the exception is thrown. This restriction takes place because of hyperscan API. diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 678dd018f38..874553315b6 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -34,6 +34,8 @@ Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. +Примечание: во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за внутренностей реализации. + ## match(haystack, pattern) Проверка строки на соответствие регулярному выражению pattern. Регулярное выражение **re2**. Синтаксис регулярных выражений **re2** является более ограниченным по сравнению с регулярными выражениями **Perl** ([подробнее](https://github.com/google/re2/wiki/Syntax)). Возвращает 0 (если не соответствует) или 1 (если соответствует). @@ -45,7 +47,7 @@ ## multiMatchAny(haystack, [pattern_1, pattern_2, ..., pattern_n]) -То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется алгоритм [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. +То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. Примечание: эта функция находится пока в экспериментальном режиме из-за некоторых [проблем](https://github.com/intel/hyperscan/issues/141). Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. From fa598d427d25e45244a3585db108b20ce6331a40 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 19:28:54 +0300 Subject: [PATCH 64/78] Fixed error --- dbms/src/DataStreams/ColumnGathererStream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ColumnGathererStream.cpp b/dbms/src/DataStreams/ColumnGathererStream.cpp index 53466599617..6e624ed971a 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.cpp +++ b/dbms/src/DataStreams/ColumnGathererStream.cpp @@ -61,8 +61,9 @@ Block ColumnGathererStream::readImpl() return Block(); MutableColumnPtr output_column = column.column->cloneEmpty(); + output_block = Block{column.cloneEmpty()}; output_column->gather(*this); - output_block = Block{{std::move(output_column), column.type, column.name}}; + output_block.getByPosition(0).column = std::move(output_column); return output_block; } From 70a7f4805db460bf5cc4da70637a8f4518047292 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 19:41:11 +0300 Subject: [PATCH 65/78] Test to the restriction added, fix bug --- dbms/src/Functions/FunctionsStringSearch.h | 26 +++--- .../00233_position_function_family.reference | 4 + .../00233_position_function_family.sql | 85 +++++++++++++++++++ 3 files changed, 101 insertions(+), 14 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 1f9544e4513..2d7d0546031 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -14,7 +14,6 @@ #include #include - namespace DB { /** Search and replace functions in strings: @@ -215,12 +214,6 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() + 1 >= std::numeric_limits::max()) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) - + ", should be at most 255.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isString(arguments[0])) throw Exception( "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -230,7 +223,6 @@ public: throw Exception( "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(std::make_shared()); } @@ -252,6 +244,12 @@ public: Array src_arr = col_const_arr->getValue(); + if (src_arr.size() > std::numeric_limits::max()) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) + + ", should be at most 255", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + std::vector refs; for (const auto & el : src_arr) refs.emplace_back(el.get()); @@ -306,12 +304,6 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { - if (arguments.size() + 1 >= LimitArgs) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) - + ", should be at most " + std::to_string(LimitArgs) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (!isString(arguments[0])) throw Exception( "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); @@ -343,6 +335,12 @@ public: Array src_arr = col_const_arr->getValue(); + if (src_arr.size() > LimitArgs) + throw Exception( + "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) + + ", should be at most " + std::to_string(LimitArgs), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + std::vector refs; refs.reserve(src_arr.size()); diff --git a/dbms/tests/queries/0_stateless/00233_position_function_family.reference b/dbms/tests/queries/0_stateless/00233_position_function_family.reference index 0c1301423d1..114d498474f 100644 --- a/dbms/tests/queries/0_stateless/00233_position_function_family.reference +++ b/dbms/tests/queries/0_stateless/00233_position_function_family.reference @@ -23432,3 +23432,7 @@ 1 1 1 +1 +1 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00233_position_function_family.sql b/dbms/tests/queries/0_stateless/00233_position_function_family.sql index d0244ebff13..a7a78d8965e 100644 --- a/dbms/tests/queries/0_stateless/00233_position_function_family.sql +++ b/dbms/tests/queries/0_stateless/00233_position_function_family.sql @@ -1,3 +1,4 @@ +SET send_logs_level = 'none'; select 1 = position('', ''); select 1 = position('abc', ''); select 0 = position('', 'abc'); @@ -1462,3 +1463,87 @@ select 0 = multiSearchAny(materialize('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab', 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab', 'b']); + +-- 254 +select +[ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 +] = +multiSearchAllPositions(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); + +select 254 = multiSearchFirstIndex(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); + + +select +[ +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 +] = +multiSearchAllPositions(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); + +select 255 = multiSearchFirstIndex(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); + +select multiSearchAllPositions(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); -- { serverError 42 } + +select multiSearchFirstIndex(materialize('string'), +['o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', +'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'o', 'str']); -- { serverError 42 } From c8bc4d6e26b2cc379d834ce36179077fb05cf13b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 19:48:14 +0300 Subject: [PATCH 66/78] Fixed error --- dbms/src/DataStreams/ColumnGathererStream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/ColumnGathererStream.cpp b/dbms/src/DataStreams/ColumnGathererStream.cpp index 6e624ed971a..c7dd5c86509 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.cpp +++ b/dbms/src/DataStreams/ColumnGathererStream.cpp @@ -63,7 +63,8 @@ Block ColumnGathererStream::readImpl() MutableColumnPtr output_column = column.column->cloneEmpty(); output_block = Block{column.cloneEmpty()}; output_column->gather(*this); - output_block.getByPosition(0).column = std::move(output_column); + if (!output_column->empty()) + output_block.getByPosition(0).column = std::move(output_column); return output_block; } From d4e67fa64cb19f6e646672e18f4bb82d1a650aaa Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 19:58:16 +0300 Subject: [PATCH 67/78] Another bug fixed to the comparison --- dbms/src/Functions/FunctionsStringSearch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 8f0e11b5e81..9a3419f4b46 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -651,7 +651,7 @@ struct MultiMatchAnyImpl for (size_t i = 0; i < haystack_offsets_size; ++i) { UInt64 length = haystack_offsets[i] - offset - 1; - if (length >= std::numeric_limits::max()) + if (length > std::numeric_limits::max()) throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES); res[i] = 0; hs_scan( From 2048e0c18394228a005f2af772715b998e177278 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 20:01:48 +0300 Subject: [PATCH 68/78] Fix sub to sup in string search docs --- docs/en/query_language/functions/string_search_functions.md | 2 +- docs/ru/query_language/functions/string_search_functions.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/query_language/functions/string_search_functions.md b/docs/en/query_language/functions/string_search_functions.md index 183ab93431b..5adc64909ba 100644 --- a/docs/en/query_language/functions/string_search_functions.md +++ b/docs/en/query_language/functions/string_search_functions.md @@ -39,7 +39,7 @@ Returns 1, if at least one string `needle_i` matches the string `haystack` and 0 For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -Note: in all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. +Note: in all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. ## match(haystack, pattern) diff --git a/docs/ru/query_language/functions/string_search_functions.md b/docs/ru/query_language/functions/string_search_functions.md index 874553315b6..0050e99c7d1 100644 --- a/docs/ru/query_language/functions/string_search_functions.md +++ b/docs/ru/query_language/functions/string_search_functions.md @@ -34,7 +34,7 @@ Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -Примечание: во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за внутренностей реализации. +Примечание: во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за внутренностей реализации. ## match(haystack, pattern) Проверка строки на соответствие регулярному выражению pattern. Регулярное выражение **re2**. Синтаксис регулярных выражений **re2** является более ограниченным по сравнению с регулярными выражениями **Perl** ([подробнее](https://github.com/google/re2/wiki/Syntax)). From d22d66a552d5ed486efb9f3aecdca7792afe9df8 Mon Sep 17 00:00:00 2001 From: Danila Kutenin Date: Thu, 28 Mar 2019 20:06:04 +0300 Subject: [PATCH 69/78] Fix exception message --- dbms/src/Functions/FunctionsStringSearch.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/FunctionsStringSearch.h b/dbms/src/Functions/FunctionsStringSearch.h index 2d7d0546031..c45eebd0366 100644 --- a/dbms/src/Functions/FunctionsStringSearch.h +++ b/dbms/src/Functions/FunctionsStringSearch.h @@ -246,7 +246,7 @@ public: if (src_arr.size() > std::numeric_limits::max()) throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) + "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(src_arr.size()) + ", should be at most 255", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -337,7 +337,7 @@ public: if (src_arr.size() > LimitArgs) throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) + "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(src_arr.size()) + ", should be at most " + std::to_string(LimitArgs), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); From f4b819c692ca3573103b335a73e7639ace839c71 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 20:52:50 +0300 Subject: [PATCH 70/78] Added TRUNCATE to the list of suggested keywords in clickhouse-client --- dbms/programs/client/Suggest.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Suggest.h b/dbms/programs/client/Suggest.h index 1be611d97b9..6120f875d57 100644 --- a/dbms/programs/client/Suggest.h +++ b/dbms/programs/client/Suggest.h @@ -39,7 +39,7 @@ private: "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "IN", - "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN" + "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE" }; /// Words are fetched asynchonously. From 3c29b5f611ab2866d40610db9b3e2b857426aa30 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 21:17:44 +0300 Subject: [PATCH 71/78] Fixed bad code --- dbms/src/Core/iostream_debug_helpers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Core/iostream_debug_helpers.cpp b/dbms/src/Core/iostream_debug_helpers.cpp index 710305eb3fa..98a9775f15d 100644 --- a/dbms/src/Core/iostream_debug_helpers.cpp +++ b/dbms/src/Core/iostream_debug_helpers.cpp @@ -80,9 +80,9 @@ std::ostream & operator<<(std::ostream & stream, const IColumn & what) stream << "{"; for (size_t i = 0; i < what.size(); ++i) { - stream << applyVisitor(FieldVisitorDump(), what[i]); if (i) stream << ", "; + stream << applyVisitor(FieldVisitorDump(), what[i]); } stream << "}"; From 4aaf9eccce792d4f4aa7e6deed2427cddad2a8b5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 22:12:37 +0300 Subject: [PATCH 72/78] Fixed error --- dbms/src/Common/COWPtr.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Common/COWPtr.h b/dbms/src/Common/COWPtr.h index 6a3394e1685..ab02a330ba9 100644 --- a/dbms/src/Common/COWPtr.h +++ b/dbms/src/Common/COWPtr.h @@ -228,6 +228,10 @@ protected: operator immutable_ptr & () { return value; } operator bool() const { return value != nullptr; } + bool operator! () const { return value == nullptr; } + + bool operator== (const chameleon_ptr & rhs) const { return value == rhs.value; } + bool operator!= (const chameleon_ptr & rhs) const { return value != rhs.value; } }; public: From 464542217609b12f296e84ce77d0c97cd1e6956e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 22:13:59 +0300 Subject: [PATCH 73/78] Fixed TSan report in StorageSystemTables --- dbms/src/Storages/System/StorageSystemTables.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 3413e8609f4..45691072461 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -174,9 +174,8 @@ protected: for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next()) { auto table_name = tables_it->name(); - const auto table = context.tryGetTable(database_name, table_name); - if (!table) - continue; + const StoragePtr & table = tables_it->table(); + auto lock = table->lockStructureForShare(false, context.getCurrentQueryId()); ++rows_count; @@ -190,13 +189,13 @@ protected: res_columns[res_index++]->insert(table_name); if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->table()->getName()); + res_columns[res_index++]->insert(table->getName()); if (columns_mask[src_index++]) res_columns[res_index++]->insert(0u); // is_temporary if (columns_mask[src_index++]) - res_columns[res_index++]->insert(tables_it->table()->getDataPath()); + res_columns[res_index++]->insert(table->getDataPath()); if (columns_mask[src_index++]) res_columns[res_index++]->insert(database->getTableMetadataPath(table_name)); From 350a0fe1297f262ff002068fd20cb3ff62317c3b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 22:58:41 +0300 Subject: [PATCH 74/78] Miscellaneous --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 40 +++++++++---------- dbms/src/Storages/MergeTree/MergeTreeData.h | 7 ++-- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 139f8276a24..a41ff8d8c8a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -611,7 +611,7 @@ String MergeTreeData::MergingParams::getModeName() const Int64 MergeTreeData::getMaxBlockNumber() { - std::lock_guard lock_all(data_parts_mutex); + auto lock = lockParts(); Int64 max_block_num = 0; for (const DataPartPtr & part : data_parts_by_info) @@ -640,7 +640,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) DataPartsVector broken_parts_to_detach; size_t suspicious_broken_parts = 0; - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); data_parts_indexes.clear(); for (const String & file_name : part_file_names) @@ -866,7 +866,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() std::vector parts_to_delete; { - std::lock_guard lock_parts(data_parts_mutex); + auto parts_lock = lockParts(); auto outdated_parts_range = getDataPartsStateRange(DataPartState::Outdated); for (auto it = outdated_parts_range.begin(); it != outdated_parts_range.end(); ++it) @@ -900,7 +900,7 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts() void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector & parts) { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); for (auto & part : parts) { /// We should modify it under data_parts_mutex @@ -912,7 +912,7 @@ void MergeTreeData::rollbackDeletingParts(const MergeTreeData::DataPartsVector & void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & parts) { { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); /// TODO: use data_parts iterators instead of pointers for (auto & part : parts) @@ -980,7 +980,7 @@ void MergeTreeData::dropAllData() { LOG_TRACE(log, "dropAllData: waiting for locks."); - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); LOG_TRACE(log, "dropAllData: removing data from memory."); @@ -1717,7 +1717,7 @@ MergeTreeData::DataPartsVector MergeTreeData::renameTempPartAndReplace( DataPartsVector covered_parts; { - std::unique_lock lock(data_parts_mutex); + auto lock = lockParts(); renameTempPartAndReplace(part, increment, out_transaction, lock, &covered_parts); } return covered_parts; @@ -1814,7 +1814,7 @@ restore_covered) { LOG_INFO(log, "Renaming " << part_to_detach->relative_path << " to " << prefix << part_to_detach->name << " and forgiving it."); - auto data_parts_lock = lockParts(); + auto lock = lockParts(); auto it_part = data_parts_by_info.find(part_to_detach->info); if (it_part == data_parts_by_info.end()) @@ -1931,7 +1931,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) { DataPartPtr part_to_delete; { - std::lock_guard lock_parts(data_parts_mutex); + auto lock = lockParts(); LOG_TRACE(log, "Trying to immediately remove part " << part->getNameWithState()); @@ -1967,7 +1967,7 @@ size_t MergeTreeData::getTotalActiveSizeInBytes() const { size_t res = 0; { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); for (auto & part : getDataPartsStateRange(DataPartState::Committed)) res += part->bytes_on_disk; @@ -1979,7 +1979,7 @@ size_t MergeTreeData::getTotalActiveSizeInBytes() const size_t MergeTreeData::getMaxPartsCountForPartition() const { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); size_t res = 0; size_t cur_count = 0; @@ -2006,7 +2006,7 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const std::optional MergeTreeData::getMinPartDataVersion() const { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); std::optional result; for (const DataPartPtr & part : getDataPartsStateRange(DataPartState::Committed)) @@ -2088,8 +2088,8 @@ MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart( MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const MergeTreePartInfo & part_info) { - DataPartsLock data_parts_lock(data_parts_mutex); - return getActiveContainingPart(part_info, DataPartState::Committed, data_parts_lock); + auto lock = lockParts(); + return getActiveContainingPart(part_info, DataPartState::Committed, lock); } MergeTreeData::DataPartPtr MergeTreeData::getActiveContainingPart(const String & part_name) @@ -2103,7 +2103,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(Merg { DataPartStateAndPartitionID state_with_partition{state, partition_id}; - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); return DataPartsVector( data_parts_by_state_and_info.lower_bound(state_with_partition), data_parts_by_state_and_info.upper_bound(state_with_partition)); @@ -2112,7 +2112,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVectorInPartition(Merg MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) @@ -2331,7 +2331,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context String partition_id = partition.getID(*this); { - DataPartsLock data_parts_lock(data_parts_mutex); + auto data_parts_lock = lockParts(); DataPartPtr existing_part_in_partition = getAnyPartInPartition(partition_id, data_parts_lock); if (existing_part_in_partition && existing_part_in_partition->partition.value != partition.value) { @@ -2352,7 +2352,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS DataPartsVector res; DataPartsVector buf; { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); for (auto state : affordable_states) { @@ -2378,7 +2378,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getAllDataPartsVector(MergeTreeDat { DataPartsVector res; { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); res.assign(data_parts_by_info.begin(), data_parts_by_info.end()); if (out_states != nullptr) @@ -2396,7 +2396,7 @@ MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affo { DataParts res; { - std::lock_guard lock(data_parts_mutex); + auto lock = lockParts(); for (auto state : affordable_states) { auto range = getDataPartsStateRange(state); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index ed2707c32d4..055a4d63b15 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -538,8 +538,7 @@ public: size_t getColumnCompressedSize(const std::string & name) const { - std::lock_guard lock{data_parts_mutex}; - + auto lock = lockParts(); const auto it = column_sizes.find(name); return it == std::end(column_sizes) ? 0 : it->second.data_compressed; } @@ -547,14 +546,14 @@ public: using ColumnSizeByName = std::unordered_map; ColumnSizeByName getColumnSizes() const { - std::lock_guard lock{data_parts_mutex}; + auto lock = lockParts(); return column_sizes; } /// Calculates column sizes in compressed form for the current state of data_parts. void recalculateColumnSizes() { - std::lock_guard lock{data_parts_mutex}; + auto lock = lockParts(); calculateColumnSizesImpl(); } From 3fb9814e3ef0ed4c03322c5b67fe04655b17f76b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 28 Mar 2019 22:59:07 +0300 Subject: [PATCH 75/78] Fixed race condition in fetchPart --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index c51070c4b44..fd1859ff6e4 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2655,7 +2655,7 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin if (auto part = data.getPartIfExists(part_info, {MergeTreeDataPart::State::Outdated, MergeTreeDataPart::State::Deleting})) { - LOG_DEBUG(log, "Part " << part->getNameWithState() << " should be deleted after previous attempt before fetch"); + LOG_DEBUG(log, "Part " << part->name << " should be deleted after previous attempt before fetch"); /// Force immediate parts cleanup to delete the part that was left from the previous fetch attempt. cleanup_thread.wakeup(); return false; From a22bb08f38e9c3a4c2ee5bf311b8d9a37daff74d Mon Sep 17 00:00:00 2001 From: never lee Date: Fri, 29 Mar 2019 04:51:29 +0800 Subject: [PATCH 76/78] Translate docs/zh/operations/table_engines/mergetree.md (#4827) * translate part of materializedview.md * translate part of docs/zh/operations/table_engines/mergetree.md * translate docs/zh/operations/table_engines/mergetree.md roughly * translate docs/zh/operations/table_engines/mergetree.md more * translate docs/zh/operations/table_engines/mergetree.md completely * a little for mergetree.md * small fix for mergetree.md --- docs/en/operations/table_engines/mergetree.md | 5 +- docs/zh/operations/table_engines/mergetree.md | 301 +++++++++++++++++- 2 files changed, 303 insertions(+), 3 deletions(-) mode change 120000 => 100644 docs/zh/operations/table_engines/mergetree.md diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index ee08913dcd1..ca846756f53 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -103,7 +103,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `date-column` — The name of a column of the type [Date](../../data_types/date.md). ClickHouse automatically creates partitions by month on the basis of this column. The partition names are in the `"YYYYMM"` format. - `sampling_expression` — an expression for sampling. -- `(primary, key)` — primary key. Type — [Tuple()](../../data_types/tuple.md- `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. The value 8192 is appropriate for most tasks. +- `(primary, key)` — primary key. Type — [Tuple()](../../data_types/tuple.md) +- `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. The value 8192 is appropriate for most tasks. **Example** @@ -147,7 +148,7 @@ If the data query specifies: The examples above show that it is always more effective to use an index than a full scan. -A sparse index allows extra strings to be read. When reading a single range of the primary key, up to `index_granularity * 2` extra rows in each data block can be read. In most cases, ClickHouse performance does not degrade when `index_granularity = 8192`. +A sparse index allows extra data to be read. When reading a single range of the primary key, up to `index_granularity * 2` extra rows in each data block can be read. In most cases, ClickHouse performance does not degrade when `index_granularity = 8192`. Sparse indexes allow you to work with a very large number of table rows, because such indexes are always stored in the computer's RAM. diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md deleted file mode 120000 index cc6ac1e5297..00000000000 --- a/docs/zh/operations/table_engines/mergetree.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/operations/table_engines/mergetree.md \ No newline at end of file diff --git a/docs/zh/operations/table_engines/mergetree.md b/docs/zh/operations/table_engines/mergetree.md new file mode 100644 index 00000000000..ac671887dea --- /dev/null +++ b/docs/zh/operations/table_engines/mergetree.md @@ -0,0 +1,300 @@ +# MergeTree {#table_engines-mergetree} + +Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及该家族(`*MergeTree`)中的其他引擎。 + +`MergeTree` 引擎家族的基本理念如下。当你有巨量数据要插入到表中,你要高效地一批批写入数据分片,并希望这些数据分片在后台按照一定规则合并。相比在插入时不断修改(重写)数据进存储,这种策略会高效很多。 + +主要特点: + +- 存储的数据按主键排序。 + + 这让你可以创建一个用于快速检索数据的小稀疏索引。 + +- 允许使用分区,如果指定了 [主键](custom_partitioning_key.md) 的话。 + + 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 + +- 支持数据副本。 + + `ReplicatedMergeTree` 家族的表便是用于此。更多信息,请参阅 [数据副本](replication.md) 一节。 + +- 支持数据采样。 + + 需要的话,你可以给表设置一个采样方法。 + +!!! 注意 + [Merge](merge.md) 引擎并不属于 `*MergeTree` 家族。 + + +## 建表 {#table_engine-mergetree-creating-a-table} + +``` +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... + INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, + INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 +) ENGINE = MergeTree() +[PARTITION BY expr] +[ORDER BY expr] +[PRIMARY KEY expr] +[SAMPLE BY expr] +[SETTINGS name=value, ...] +``` + +请求参数的描述,参考 [请求描述](../../query_language/create.md) 。 + +**子句** + +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 + +- `PARTITION BY` — [分区键](custom_partitioning_key.md) 。 + + 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../data_types/date.md) 类型的列。这里该分区名格式会是 `"YYYYMM"` 这样。 + +- `ORDER BY` — 表的排序键。 + + 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 + +- `PRIMARY KEY` - 主键,如果要设成 [跟排序键不相同](mergetree.md)。 + + 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 + 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 + +- `SAMPLE BY` — 用于抽样的表达式。 + + 如果要用抽样表达式,主键中必须包含这个表达式。例如: + `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 + +- `SETTINGS` — 影响 `MergeTree` 性能的额外参数: + - `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。默认值,8192 。该列表中所有可用的参数可以从这里查看 [MergeTreeSettings.h](https://github.com/yandex/ClickHouse/blob/master/dbms/src/Storages/MergeTree/MergeTreeSettings.h) 。 + - `use_minimalistic_part_header_in_zookeeper` — 数据分片头在 ZooKeeper 中的存储方式。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../server_settings/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据分片时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 + +**示例配置** + +``` +ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 +``` + +示例中,我们设为按月分区。 + +同时我们设置了一个按用户ID哈希的抽样表达式。这让你可以有该表中每个 `CounterID` 和 `EventDate` 下面的数据的伪随机分布。如果你在查询时指定了 [SAMPLE](../../query_language/select.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 + +`index_granularity` 可省略,默认值为 8192 。 + +
已弃用的建表方法 + +!!! 注意 + 不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 + +``` +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` + +**MergeTree() 参数** + +- `date-column` — 类型为 [Date](../../data_types/date.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `sampling_expression` — 采样表达式。 +- `(primary, key)` — 主键。类型 — [Tuple()](../../data_types/tuple.md) +- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 + +**示例** + +``` +MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192) +``` + +对于主要的配置方法,这里 `MergeTree` 引擎跟前面的例子一样,可以以同样的方式配置。 +
+ +## 数据存储 + +表由按主键排序的数据 *分片* 组成。 + +当数据被插入到表中时,会分成数据分片并按主键的字典序排序。例如,主键是 `(CounterID, Date)` 时,分片中数据按 `CounterID` 排序,具有相同 `CounterID` 的部分按 `Date` 排序。 + +不同分区的数据会被分成不同的分片,ClickHouse 在后台合并数据分片以便更高效存储。不会合并来自不同分区的数据分片。这个合并机制并不保证相同主键的所有行都会合并到同一个数据分片中。 + +ClickHouse 会为每个数据分片创建一个索引文件,索引文件包含每个索引行(『标记』)的主键值。索引行号定义为 `n * index_granularity` 。最大的 `n` 等于总行数除以 `index_granularity` 的值的整数部分。对于每列,跟主键相同的索引行处也会写入『标记』。这些『标记』让你可以直接找到数据所在的列。 + +你可以只用一单一大表并不断地一块块往里面加入数据 – `MergeTree` 引擎的就是为了这样的场景。 + +## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queriesko + +我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: + +``` +全部数据 : [-------------------------------------------------------------------------] +CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] +Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] +标记: | | | | | | | | | | | + a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 +标记号: 0 1 2 3 4 5 6 7 8 9 10 +``` + +如果指定查询如下: + +- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 +- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 +- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 + +上面例子可以看出使用索引通常会比全表描述要高效。 + +稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。大部分情况下,当 `index_granularity = 8192` 时,ClickHouse的性能并不会降级。 + +稀疏索引让你能操作有巨量行的表。因为这些索引是常驻内存(RAM)的。 + +ClickHouse 不要求主键惟一。所以,你可以插入多条具有相同主键的行。 + +### 主键的选择 + +主键中列的数量并没有明确的限制。依据数据结构,你应该让主键包含多些或少些列。这样可以: + +- 改善索引的性能。 + + 如果当前主键是 `(a, b)` ,然后加入另一个 `c` 列,满足下面条件时,则可以改善性能: + - 有带有 `c` 列条件的查询。 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这种的情况很普遍。换言之,就是加入另一列后,可以让你的查询略过很长的数据范围。 + +- 改善数据压缩。 + + ClickHouse 以主键排序分片数据,所以,数据的一致性越高,压缩越好。 + +- [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里,数据合并时,会有额外的处理逻辑。 + + 在这种情况下,指定一个跟主键不同的 *排序键* 也是有意义的。 + +长的主键会对插入性能和内存消耗有负面影响,但主键中额外的列并不影响 `SELECT` 查询的性能。 + + +### 选择跟排序键不一样主键 + +指定一个跟排序键(用于排序数据分片中行的表达式) +不一样的主键(用于计算写到索引文件的每个标记值的表达式)是可以的。 +这种情况下,主键表达式元组必须是排序键表达式元组的一个前缀。 + +当使用 [SummingMergeTree](summingmergetree.md) 和 +[AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。 +通常,使用这类引擎时,表里列分两种:*维度* 和 *度量* 。 +典型的查询是在 `GROUP BY` 并过虑维度的情况下统计度量列的值。 +像 SummingMergeTree 和 AggregatingMergeTree ,用相同的排序键值统计行时, +通常会加上所有的维度。结果就是,这键的表达式会是一长串的列组成, +并且这组列还会因为新加维度必须频繁更新。 + +这种情况下,主键中仅预留少量列保证高效范围扫描, +剩下的维度列放到排序键元组里。这样是合理的。 + +[排序键的修改](../../query_language/alter.md) 是轻量级的操作,因为一个新列同时被加入到表里和排序键后时,已存在的数据分片并不需要修改。由于旧的排序键是新排序键的前缀,并且刚刚添加的列中没有数据,因此在表修改时的数据对于新旧的排序键来说都是有序的。 + +### 索引和分区在查询中的应用 + +对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为谓词链接一子项或整个)则可以使用索引:基于主键或分区键的列或表达式的部分的等式或比较运算表达式;基于主键或分区键的列或表达式的固定前缀的 `IN` 或 `LIKE` 表达式;基于主键或分区键的列的某些函数;基于主键或分区键的表达式的逻辑表达式。 + +因此,在索引键的一个或多个区间上快速地跑查询都是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等运行查询,都会非常快。 + +当引擎配置如下时: + +``` +ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` + +这种情况下,这些查询: + +``` sql +SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 +SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42) +SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) +``` + +ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 + +上文的查询显示,即使索引用于复杂表达式。因为读表操作是组织好的,所以,使用索引不会比完整扫描慢。 + +下面这个例子中,不会使用索引。 + +``` sql +SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' +``` + +要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../settings/settings.md) 。 + +按月分区的分区键是只能读取包含适当范围日期的数据块。这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有带主键前缀条件的查询将会导致读取超过这个日期范围。 + + +### 跳数索引(分段汇总索引,实验性的) + +需要设置 `allow_experimental_data_skipping_indices` 为 1 才能使用此索引。(执行 `SET allow_experimental_data_skipping_indices = 1`)。 + +此索引在 `CREATE` 语句的列部分里定义。 +```sql +INDEX index_name expr TYPE type(...) GRANULARITY granularity_value +``` + +`*MergeTree` 家族的表都能指定跳数索引。 + +这些索引是由数据块按粒度分割后的每部分在指定表达式上汇总信息 `granularity_value` 组成(粒度大小用表引擎里 `index_granularity` 的指定)。 +这些汇总信息有助于用 `where` 语句跳过大片不满足的数据,从而减少 `SELECT` 查询从磁盘读取的数据量, + + +示例 +```sql +CREATE TABLE table_name +( + u64 UInt64, + i32 Int32, + s String, + ... + INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, + INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 +) ENGINE = MergeTree() +... +``` + +上例中的索引能让 ClickHouse 执行下面这些查询时减少读取数据量。 +```sql +SELECT count() FROM table WHERE s < 'z' +SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +``` + +#### 索引的可用类型 + +* `minmax` +存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 + +* `set(max_rows)` +存储指定表达式的惟一值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查 `WHERE` 表达式是否满足某个数据块。 + +* `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +存储包含数据块中所有 n 元短语的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 +可用于优化 `equals` , `like` 和 `in` 表达式的性能。 +`n` -- 短语长度。 +`size_of_bloom_filter_in_bytes` -- 布隆过滤器大小,单位字节。(因为压缩得好,可以指定比较大的值,如256或512)。 +`number_of_hash_functions` -- 布隆过滤器中使用的 hash 函数的个数。 +`random_seed` -- hash 函数的随机种子。 + +* `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +跟 `ngrambf_v1` 类似,不同于 ngrams 存储字符串指定长度的所有片段。它只存储被非字母数据字符分割的片段。 + +```sql +INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 +INDEX sample_index2 (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4 +INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4 +``` + + +## 并发数据访问 + +应对表的并发访问,我们使用多版本机制。换言之,当同时读和更新表时,数据从当前查询到的一组分片中读取。没有冗长的的锁。插入不会阻碍读取。 + +对表的读操作是自动并行的。 + + +[来源文章](https://clickhouse.yandex/docs/en/operations/table_engines/mergetree/) From 46be1608a60d554d8d4d8d0ee917d77b3016128b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 29 Mar 2019 02:17:22 +0300 Subject: [PATCH 77/78] Fixed error #4836 --- dbms/src/Storages/System/StorageSystemTables.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/System/StorageSystemTables.cpp b/dbms/src/Storages/System/StorageSystemTables.cpp index 45691072461..e962b9883b3 100644 --- a/dbms/src/Storages/System/StorageSystemTables.cpp +++ b/dbms/src/Storages/System/StorageSystemTables.cpp @@ -21,6 +21,7 @@ namespace DB namespace ErrorCodes { extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int TABLE_IS_DROPPED; } @@ -175,7 +176,19 @@ protected: { auto table_name = tables_it->name(); const StoragePtr & table = tables_it->table(); - auto lock = table->lockStructureForShare(false, context.getCurrentQueryId()); + + TableStructureReadLockHolder lock; + + try + { + lock = table->lockStructureForShare(false, context.getCurrentQueryId()); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::TABLE_IS_DROPPED) + continue; + throw; + } ++rows_count; From d086a63f66be93a01b7e688af450dc533f4c8171 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 29 Mar 2019 11:26:42 +0300 Subject: [PATCH 78/78] clickhouse-test retry on session expired --- dbms/tests/clickhouse-test | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 242dc17de0b..ac9dcde1f36 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -31,6 +31,7 @@ MSG_OK = OP_SQUARE_BRACKET + colored(" OK ", "green", attrs=['bold']) + CL_SQUAR MSG_SKIPPED = OP_SQUARE_BRACKET + colored(" SKIPPED ", "cyan", attrs=['bold']) + CL_SQUARE_BRACKET MESSAGES_TO_RETRY = [ + "DB::Exception: ZooKeeper session has been expired", "Coordination::Exception: Connection loss", ]