From a1cdb0049d34d6bf6b4f0058a2906c5084c94657 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 13:47:22 +0300 Subject: [PATCH 01/24] Add test for user exception check --- .../__init__.py | 0 .../configs/remote_servers.xml | 16 ++++++ .../configs/user_restrictions.xml | 38 +++++++++++++ .../test.py | 54 +++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml new file mode 100644 index 00000000000..3593cbd7f36 --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml new file mode 100644 index 00000000000..bd91f1d495c --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml @@ -0,0 +1,38 @@ + + + + 10000000000 + 0 + random + + + 10000000000 + 0 + random + 2 + + + + + + + ::/0 + + default + default + + + + + ::/0 + + good + default + + + + + + + + diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py new file mode 100644 index 00000000000..665f0877586 --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -0,0 +1,54 @@ +import time + +import pytest + +from multiprocessing.dummy import Pool +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) +node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for num, node in enumerate([node1, node2]): + node.query("create table real_tbl (ID UInt64, Value String) ENGINE = MergeTree() order by tuple()") + node.query("insert into real_tbl values(0, '0000'), (1, '1111')") + node.query("create table distr_tbl (ID UInt64, Value String) ENGINE Distributed(test_cluster, default, real_tbl)") + + node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") + node1.query("insert into nums values(0),(1)") + + yield cluster + finally: + cluster.shutdown() + +def num_getter(num): + if num % 2 == 0: + return node1 + else: + return node2 + +@pytest.mark.parametrize("node_getter", [ + (lambda _: node1), + (lambda _: node2), + (num_getter), +]) +def test_exception_message(started_cluster, node_getter): + assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" + assert node1.query("select number from nums order by number") == "0\n1\n" + try: + p = Pool(10) + def query(num): + node = node_getter(num) + node.query( + "select sleep(2) from distr_tbl where ID GLOBAL IN (select number from remote('node1', 'default', 'nums'))", + user='good') + + p.map(query, xrange(3)) + except Exception as ex: + assert 'Too many simultaneous queries for user good.' in ex.message + print ex.message From cea23a1486af3eab22b35e7ff6422abe74aadc7d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Jan 2019 15:16:03 +0300 Subject: [PATCH 02/24] Fix LowCardinality empty array serialization. #3907 --- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 4 ++++ .../00800_low_cardinality_empty_array.reference | 2 ++ .../0_stateless/00800_low_cardinality_empty_array.sql | 7 +++++++ 3 files changed, 13 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index cf38941b743..01928a3db53 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -508,6 +508,10 @@ void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams( size_t max_limit = column.size() - offset; limit = limit ? std::min(limit, max_limit) : max_limit; + /// Do not write anything for empty column. (May happen while writing empty arrays.) + if (limit == 0) + return; + auto sub_column = low_cardinality_column.cutAndCompact(offset, limit); ColumnPtr positions = sub_column->getIndexesPtr(); ColumnPtr keys = sub_column->getDictionary().getNestedColumn(); diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference new file mode 100644 index 00000000000..c71bf50e82f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference @@ -0,0 +1,2 @@ +[] +[] diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql new file mode 100644 index 00000000000..0f02f6aa2d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql @@ -0,0 +1,7 @@ +drop table if exists test.lc; +create table test.lc (names Array(LowCardinality(String))) engine=MergeTree order by tuple(); +insert into test.lc values ([]); +insert into test.lc select emptyArrayString(); +select * from test.lc; +drop table if exists test.lc; + From b8efafd400d37c0be10706c6485997c51a78386e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 15:21:04 +0300 Subject: [PATCH 03/24] Fix bug with wrong user restrictions in remote table func --- .../ClusterProxy/executeQuery.cpp | 21 +++++++---- .../Interpreters/ClusterProxy/executeQuery.h | 4 +++ .../Storages/getStructureOfRemoteTable.cpp | 6 +++- .../configs/remote_servers.xml | 16 --------- .../test.py | 35 ++++++++----------- 5 files changed, 37 insertions(+), 45 deletions(-) delete mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index 27b7d8338af..4b9aa713f07 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -14,14 +14,8 @@ namespace DB namespace ClusterProxy { -BlockInputStreams executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, - const ASTPtr & query_ast, const Context & context, const Settings & settings) +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) { - BlockInputStreams res; - - const std::string query = queryToString(query_ast); - Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -39,6 +33,19 @@ BlockInputStreams executeQuery( Context new_context(context); new_context.setSettings(new_settings); + return new_context; +} + +BlockInputStreams executeQuery( + IStreamFactory & stream_factory, const ClusterPtr & cluster, + const ASTPtr & query_ast, const Context & context, const Settings & settings) +{ + BlockInputStreams res; + + const std::string query = queryToString(query_ast); + + Context new_context = removeUserRestrictionsFromSettings(context, settings); + ThrottlerPtr user_level_throttler; if (auto process_list_element = context.getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.h b/dbms/src/Interpreters/ClusterProxy/executeQuery.h index b12fc2b4646..5c07c287954 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.h +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.h @@ -16,6 +16,10 @@ namespace ClusterProxy class IStreamFactory; +/// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) +/// from settings and creates new context with them +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings); + /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). diff --git a/dbms/src/Storages/getStructureOfRemoteTable.cpp b/dbms/src/Storages/getStructureOfRemoteTable.cpp index 174ec49a4f1..1e5b37d62d0 100644 --- a/dbms/src/Storages/getStructureOfRemoteTable.cpp +++ b/dbms/src/Storages/getStructureOfRemoteTable.cpp @@ -1,6 +1,7 @@ #include "getStructureOfRemoteTable.h" #include #include +#include #include #include #include @@ -54,7 +55,10 @@ ColumnsDescription getStructureOfRemoteTable( ColumnsDescription res; - auto input = std::make_shared(shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context); + + auto new_context = ClusterProxy::removeUserRestrictionsFromSettings(context, context.getSettingsRef()); + /// Execute remote query without restrictions (because it's not real user query, but part of implementation) + auto input = std::make_shared(shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), new_context); input->setPoolMode(PoolMode::GET_ONE); if (!table_func_ptr) input->setMainTable(QualifiedTableName{database, table}); diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml deleted file mode 100644 index 3593cbd7f36..00000000000 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - node1 - 9000 - - - node2 - 9000 - - - - - diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py index 665f0877586..26a42637063 100644 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -22,33 +22,26 @@ def started_cluster(): node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") node1.query("insert into nums values(0),(1)") + node2.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") + node2.query("insert into nums values(0),(1)") + yield cluster finally: cluster.shutdown() -def num_getter(num): - if num % 2 == 0: - return node1 - else: - return node2 - -@pytest.mark.parametrize("node_getter", [ - (lambda _: node1), - (lambda _: node2), - (num_getter), -]) -def test_exception_message(started_cluster, node_getter): +def test_exception_message(started_cluster): assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" assert node1.query("select number from nums order by number") == "0\n1\n" - try: - p = Pool(10) - def query(num): - node = node_getter(num) - node.query( - "select sleep(2) from distr_tbl where ID GLOBAL IN (select number from remote('node1', 'default', 'nums'))", - user='good') - p.map(query, xrange(3)) + def node_busy(_): + for i in xrange(10): + node1.query("select sleep(2)", user='default') + + busy_pool = Pool(3) + busy_pool.map_async(node_busy, xrange(3)) + time.sleep(1) # wait a little until polling start + try: + assert node2.query("select number from remote('node1', 'default', 'nums')", user='good') == "0\n1\n" except Exception as ex: - assert 'Too many simultaneous queries for user good.' in ex.message print ex.message + assert False, "Exception thrown while max_concurrent_queries_for_user is not exceeded" From f00b7ba08a3699fd299440c7b7a91adeb57d4d0d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 15:23:41 +0300 Subject: [PATCH 04/24] Simplify test --- .../test.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py index 26a42637063..4b7cc87c15a 100644 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -7,30 +7,21 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) -node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) +node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml']) +node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml']) @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - for num, node in enumerate([node1, node2]): - node.query("create table real_tbl (ID UInt64, Value String) ENGINE = MergeTree() order by tuple()") - node.query("insert into real_tbl values(0, '0000'), (1, '1111')") - node.query("create table distr_tbl (ID UInt64, Value String) ENGINE Distributed(test_cluster, default, real_tbl)") - node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") node1.query("insert into nums values(0),(1)") - node2.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") - node2.query("insert into nums values(0),(1)") - yield cluster finally: cluster.shutdown() def test_exception_message(started_cluster): - assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" assert node1.query("select number from nums order by number") == "0\n1\n" def node_busy(_): @@ -39,7 +30,7 @@ def test_exception_message(started_cluster): busy_pool = Pool(3) busy_pool.map_async(node_busy, xrange(3)) - time.sleep(1) # wait a little until polling start + time.sleep(1) # wait a little until polling starts try: assert node2.query("select number from remote('node1', 'default', 'nums')", user='good') == "0\n1\n" except Exception as ex: From 87c4443f986b2c225577af6203a3e87eb40c2c2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 17:34:25 +0300 Subject: [PATCH 05/24] Removed outdated test --- dbms/src/Storages/tests/CMakeLists.txt | 3 - dbms/src/Storages/tests/seek_speed_test.cpp | 68 --------------------- 2 files changed, 71 deletions(-) delete mode 100644 dbms/src/Storages/tests/seek_speed_test.cpp diff --git a/dbms/src/Storages/tests/CMakeLists.txt b/dbms/src/Storages/tests/CMakeLists.txt index 2942ecfe6bc..007219a3b96 100644 --- a/dbms/src/Storages/tests/CMakeLists.txt +++ b/dbms/src/Storages/tests/CMakeLists.txt @@ -4,9 +4,6 @@ target_link_libraries (system_numbers PRIVATE dbms clickhouse_storages_system cl add_executable (storage_log storage_log.cpp) target_link_libraries (storage_log PRIVATE dbms) -add_executable (seek_speed_test seek_speed_test.cpp) -target_link_libraries (seek_speed_test PRIVATE dbms) - add_executable (part_checker part_checker.cpp) target_link_libraries (part_checker PRIVATE dbms) diff --git a/dbms/src/Storages/tests/seek_speed_test.cpp b/dbms/src/Storages/tests/seek_speed_test.cpp deleted file mode 100644 index 70910d65b5d..00000000000 --- a/dbms/src/Storages/tests/seek_speed_test.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -/** We test the hypothesis that skipping unnecessary parts of seek-forward never degrades overall read speed. - * Before the measurements, it is desirable to discard disk cache: `echo 3 > /proc/sys/vm/drop_caches`. - * - * Result: yes, even frequent relatively short seek forward does not worsen anything on all tested parameters - * - 1MiB of data, 16 0 0 16 vs 16 16 32 16 - * - 1GiB of data, 1048576 0 0 vs 1048576 512 1024 vs 1048576 1048576 1048576 - * - 1GiB of data, 1024 0 0 vs 1024 512 1024 - */ - -int main(int argc, const char ** argv) -{ - if (argc < 5 || argc > 6) - { - std::cerr << "Usage:\n" - << argv[0] << " file bytes_in_block min_skip_bytes max_skip_bytes [buffer_size]" << std::endl; - return 0; - } - - int block = atoi(argv[2]); - int min_skip = atoi(argv[3]); - int max_skip = atoi(argv[4]); - size_t buf_size = argc <= 5 ? DBMS_DEFAULT_BUFFER_SIZE : static_cast(atoi(argv[5])); - - UInt64 size = Poco::File(argv[1]).getSize(); - UInt64 pos = 0; - DB::ReadBufferFromFile in(argv[1], buf_size); - auto buf = std::make_unique(block); - int checksum = 0; - UInt64 bytes_read = 0; - - Stopwatch watch; - - while (!in.eof()) - { - UInt64 len = static_cast(rand() % (max_skip - min_skip + 1) + min_skip); - len = std::min(len, size - pos); - off_t seek_res = in.seek(len, SEEK_CUR); - pos += len; - if (seek_res != static_cast(pos)) - { - std::cerr << "Unexpected seek return value: " << seek_res << "; expeted " << pos << ", seeking by " << len << std::endl; - return 1; - } - len = std::min(static_cast(block), size - pos); - in.read(buf.get(), len); - checksum += buf[0] + buf[block - 1]; - pos += len; - bytes_read += len; - } - watch.stop(); - - std::cout << checksum << std::endl; /// don't optimize - - std::cout << "Read " << bytes_read << " out of " << size << " bytes in " - << std::setprecision(4) << watch.elapsedSeconds() << " seconds (" - << bytes_read / watch.elapsedSeconds() / 1000000 << " MB/sec.)" << std::endl; - - return 0; -} From 5ab362a303725368ee1f9db26179b37f39f62fad Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Jan 2019 17:47:51 +0300 Subject: [PATCH 06/24] Fix distinct by single LowCardinality numeric column. --- dbms/src/Columns/ColumnLowCardinality.h | 2 +- dbms/src/Interpreters/SetVariants.cpp | 2 +- ...low_cardinality_distinct_numeric.reference | 123 ++++++++++++++++++ ...00800_low_cardinality_distinct_numeric.sql | 7 + 4 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index bfca6e41123..34a3db8589e 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -133,7 +133,7 @@ public: } bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } - bool isFixedAndContiguous() const override { return getDictionary().isFixedAndContiguous(); } + bool isFixedAndContiguous() const override { return false; } size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } diff --git a/dbms/src/Interpreters/SetVariants.cpp b/dbms/src/Interpreters/SetVariants.cpp index f0d9bbb2af8..6f457ed0bed 100644 --- a/dbms/src/Interpreters/SetVariants.cpp +++ b/dbms/src/Interpreters/SetVariants.cpp @@ -137,7 +137,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose } /// If there is one numeric key that fits into 64 bits - if (keys_size == 1 && nested_key_columns[0]->isNumeric()) + if (keys_size == 1 && nested_key_columns[0]->isNumeric() && !nested_key_columns[0]->lowCardinality()) { size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed(); if (size_of_field == 1) diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference new file mode 100644 index 00000000000..a39df1e16c0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference @@ -0,0 +1,123 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql new file mode 100644 index 00000000000..d3ba9138a8f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql @@ -0,0 +1,7 @@ +set allow_experimental_low_cardinality_type = 1; +drop table if exists test.lc; +create table test.lc (val LowCardinality(UInt64)) engine = MergeTree order by val; +insert into test.lc select number % 123 from system.numbers limit 100000; +select distinct(val) from test.lc order by val; +drop table if exists test.lc; + From 19f465295940383a314d7957f20826880ed495ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 18:44:20 +0300 Subject: [PATCH 07/24] Fixed bugs found by PVS-Studio --- dbms/programs/copier/ClusterCopier.cpp | 62 +++++++++---------- dbms/programs/obfuscator/Obfuscator.cpp | 4 +- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 +- dbms/src/Compression/CompressionCodecZSTD.cpp | 2 +- .../MergeSortingBlockInputStream.cpp | 2 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 2 +- dbms/src/Functions/hasColumnInTable.cpp | 2 +- .../Interpreters/InterpreterInsertQuery.cpp | 1 - .../Interpreters/InterpreterSystemQuery.cpp | 3 +- .../src/Interpreters/MutationsInterpreter.cpp | 3 +- .../PredicateExpressionsOptimizer.cpp | 8 +++ .../PredicateExpressionsOptimizer.h | 6 -- dbms/src/Interpreters/tests/hash_map.cpp | 29 +++++---- dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../ReplicatedMergeTreeCleanupThread.cpp | 5 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 3 +- .../TableFunctions/TableFunctionNumbers.cpp | 2 +- libs/libcommon/include/common/Types.h | 21 ------- libs/libcommon/src/tests/CMakeLists.txt | 2 +- utils/compressor/mutator.cpp | 4 +- 22 files changed, 77 insertions(+), 94 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 91ccdc88275..203a7f0cebb 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -243,7 +243,7 @@ struct ClusterPartition UInt64 rows_copied = 0; UInt64 blocks_copied = 0; - size_t total_tries = 0; + UInt64 total_tries = 0; }; @@ -340,7 +340,7 @@ struct TaskCluster String default_local_database; /// Limits number of simultaneous workers - size_t max_workers = 0; + UInt64 max_workers = 0; /// Base settings for pull and push Settings settings_common; @@ -773,11 +773,11 @@ public: } template - decltype(auto) retry(T && func, size_t max_tries = 100) + decltype(auto) retry(T && func, UInt64 max_tries = 100) { std::exception_ptr exception; - for (size_t try_number = 1; try_number <= max_tries; ++try_number) + for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { try { @@ -880,7 +880,7 @@ public: } /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(TaskTable & task_table, size_t num_threads = 0) + void discoverTablePartitions(TaskTable & task_table, UInt64 num_threads = 0) { /// Fetch partitions list from a shard { @@ -985,7 +985,7 @@ public: /// Retry table processing bool table_is_done = false; - for (size_t num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries) + for (UInt64 num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries) { if (tryProcessTable(task_table)) { @@ -1044,7 +1044,7 @@ protected: String workers_path = getWorkersPath(); String current_worker_path = getCurrentWorkerNodePath(); - size_t num_bad_version_errors = 0; + UInt64 num_bad_version_errors = 0; while (true) { @@ -1055,7 +1055,7 @@ protected: auto version = stat.version; zookeeper->get(workers_path, &stat); - if (static_cast(stat.numChildren) >= task_cluster->max_workers) + if (static_cast(stat.numChildren) >= task_cluster->max_workers) { LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")" << ". Postpone processing " << description); @@ -1163,7 +1163,7 @@ protected: } // If all task is finished and zxid is not changed then partition could not become dirty again - for (size_t shard_num = 0; shard_num < status_paths.size(); ++shard_num) + for (UInt64 shard_num = 0; shard_num < status_paths.size(); ++shard_num) { if (zxid1[shard_num] != zxid2[shard_num]) { @@ -1280,7 +1280,7 @@ protected: LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query); /// Limit number of max executing replicas to 1 - size_t num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1); + UInt64 num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1); if (num_shards < cluster_push->getShardCount()) { @@ -1299,8 +1299,8 @@ protected: } - static constexpr size_t max_table_tries = 1000; - static constexpr size_t max_shard_partition_tries = 600; + static constexpr UInt64 max_table_tries = 1000; + static constexpr UInt64 max_shard_partition_tries = 600; bool tryProcessTable(TaskTable & task_table) { @@ -1317,7 +1317,7 @@ protected: Stopwatch watch; TasksShard expected_shards; - size_t num_failed_shards = 0; + UInt64 num_failed_shards = 0; ++cluster_partition.total_tries; @@ -1368,7 +1368,7 @@ protected: bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote; PartitionTaskStatus task_status = PartitionTaskStatus::Error; bool was_error = false; - for (size_t try_num = 0; try_num < max_shard_partition_tries; ++try_num) + for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) { task_status = tryProcessPartitionTask(partition, is_unprioritized_task); @@ -1434,8 +1434,8 @@ protected: } } - size_t required_partitions = task_table.cluster_partitions.size(); - size_t finished_partitions = task_table.finished_cluster_partitions.size(); + UInt64 required_partitions = task_table.cluster_partitions.size(); + UInt64 finished_partitions = task_table.finished_cluster_partitions.size(); bool table_is_done = finished_partitions >= required_partitions; if (!table_is_done) @@ -1645,7 +1645,7 @@ protected: String query = queryToString(create_query_push_ast); LOG_DEBUG(log, "Create destination tables. Query: " << query); - size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, + UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, PoolMode::GET_MANY); LOG_DEBUG(log, "Destination tables " << getDatabaseDotTable(task_table.table_push) << " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount()); @@ -1699,7 +1699,7 @@ protected: std::future future_is_dirty_checker; Stopwatch watch(CLOCK_MONOTONIC_COARSE); - constexpr size_t check_period_milliseconds = 500; + constexpr UInt64 check_period_milliseconds = 500; /// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copy data auto cancel_check = [&] () @@ -1917,16 +1917,16 @@ protected: /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster * Returns number of shards for which at least one replica executed query successfully */ - size_t executeQueryOnCluster( + UInt64 executeQueryOnCluster( const ClusterPtr & cluster, const String & query, const ASTPtr & query_ast_ = nullptr, const Settings * settings = nullptr, PoolMode pool_mode = PoolMode::GET_ALL, - size_t max_successful_executions_per_shard = 0) const + UInt64 max_successful_executions_per_shard = 0) const { auto num_shards = cluster->getShardsInfo().size(); - std::vector per_shard_num_successful_replicas(num_shards, 0); + std::vector per_shard_num_successful_replicas(num_shards, 0); ASTPtr query_ast; if (query_ast_ == nullptr) @@ -1939,10 +1939,10 @@ protected: /// We need to execute query on one replica at least - auto do_for_shard = [&] (size_t shard_index) + auto do_for_shard = [&] (UInt64 shard_index) { const Cluster::ShardInfo & shard = cluster->getShardsInfo().at(shard_index); - size_t & num_successful_executions = per_shard_num_successful_replicas.at(shard_index); + UInt64 & num_successful_executions = per_shard_num_successful_replicas.at(shard_index); num_successful_executions = 0; auto increment_and_check_exit = [&] () @@ -1951,12 +1951,12 @@ protected: return max_successful_executions_per_shard && num_successful_executions >= max_successful_executions_per_shard; }; - size_t num_replicas = cluster->getShardsAddresses().at(shard_index).size(); - size_t num_local_replicas = shard.getLocalNodeCount(); - size_t num_remote_replicas = num_replicas - num_local_replicas; + UInt64 num_replicas = cluster->getShardsAddresses().at(shard_index).size(); + UInt64 num_local_replicas = shard.getLocalNodeCount(); + UInt64 num_remote_replicas = num_replicas - num_local_replicas; /// In that case we don't have local replicas, but do it just in case - for (size_t i = 0; i < num_local_replicas; ++i) + for (UInt64 i = 0; i < num_local_replicas; ++i) { auto interpreter = InterpreterFactory::get(query_ast, context); interpreter->execute(); @@ -1997,16 +1997,16 @@ protected: }; { - ThreadPool thread_pool(std::min(num_shards, getNumberOfPhysicalCPUCores())); + ThreadPool thread_pool(std::min(num_shards, UInt64(getNumberOfPhysicalCPUCores()))); - for (size_t shard_index = 0; shard_index < num_shards; ++shard_index) + for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index) thread_pool.schedule([=] { do_for_shard(shard_index); }); thread_pool.wait(); } - size_t successful_shards = 0; - for (size_t num_replicas : per_shard_num_successful_replicas) + UInt64 successful_shards = 0; + for (UInt64 num_replicas : per_shard_num_successful_replicas) successful_shards += (num_replicas > 0); return successful_shards; diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 0d9946813b9..6edb0de82b3 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -123,7 +123,7 @@ UInt64 hash(Ts... xs) UInt64 maskBits(UInt64 x, size_t num_bits) { - return x & ((1 << num_bits) - 1); + return x & ((1ULL << num_bits) - 1); } @@ -149,7 +149,7 @@ UInt64 feistelNetwork(UInt64 x, size_t num_bits, UInt64 seed, size_t num_rounds UInt64 bits = maskBits(x, num_bits); for (size_t i = 0; i < num_rounds; ++i) bits = feistelRound(bits, num_bits, seed, i); - return (x & ~((1 << num_bits) - 1)) ^ bits; + return (x & ~((1ULL << num_bits) - 1)) ^ bits; } diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 47aa2b91b17..9626a54aa20 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1039,8 +1039,8 @@ void ZooKeeper::sendThread() { /// Wait for the next request in queue. No more than operation timeout. No more than until next heartbeat time. UInt64 max_wait = std::min( - std::chrono::duration_cast(next_heartbeat_time - now).count(), - operation_timeout.totalMilliseconds()); + UInt64(std::chrono::duration_cast(next_heartbeat_time - now).count()), + UInt64(operation_timeout.totalMilliseconds())); RequestInfo info; if (requests_queue.tryPop(info, max_wait)) diff --git a/dbms/src/Compression/CompressionCodecZSTD.cpp b/dbms/src/Compression/CompressionCodecZSTD.cpp index d3f96cc7e06..8c79b4439ac 100644 --- a/dbms/src/Compression/CompressionCodecZSTD.cpp +++ b/dbms/src/Compression/CompressionCodecZSTD.cpp @@ -63,7 +63,7 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_) void registerCodecZSTD(CompressionCodecFactory & factory) { - UInt8 method_code = static_cast(CompressionMethodByte::ZSTD); + UInt8 method_code = UInt8(CompressionMethodByte::ZSTD); factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr { int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL; diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 0dfd07fc6b6..12ad34b6433 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -78,7 +78,7 @@ Block MergeSortingBlockInputStream::readImpl() if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort) { Poco::File(tmp_path).createDirectories(); - temporary_files.emplace_back(new Poco::TemporaryFile(tmp_path)); + temporary_files.emplace_back(std::make_unique(tmp_path)); const std::string & path = temporary_files.back()->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.h b/dbms/src/Dictionaries/Embedded/RegionsNames.h index 074a41162f1..7acb23d001e 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.h +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.h @@ -73,7 +73,7 @@ public: { size_t language_id = static_cast(language); - if (region_id > names_refs[language_id].size()) + if (region_id >= names_refs[language_id].size()) return StringRef("", 0); StringRef ref = names_refs[language_id][region_id]; diff --git a/dbms/src/Functions/hasColumnInTable.cpp b/dbms/src/Functions/hasColumnInTable.cpp index 1039cd1b70b..9c8017497e3 100644 --- a/dbms/src/Functions/hasColumnInTable.cpp +++ b/dbms/src/Functions/hasColumnInTable.cpp @@ -132,7 +132,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & has_column = remote_columns.hasPhysical(column_name); } - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, has_column); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, Field(has_column)); } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index e9d4e3f1440..1af0210bc27 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -99,7 +99,6 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared(query.database, query.table, table, context, query_ptr, query.no_destination); - /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote())) diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index feb351180c7..fc472ad8a9e 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB @@ -289,7 +290,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) if (replica_names.empty()) return; - ThreadPool pool(std::min(getNumberOfPhysicalCPUCores(), replica_names.size())); + ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size())); for (auto & table : replica_names) pool.schedule([&] () { tryRestartReplica(table.first, table.second, system_context); }); pool.wait(); diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index d59fc811338..28578ac19dd 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -203,10 +203,9 @@ void MutationsInterpreter::prepare(bool dry_run) } } } - } - if (!updated_columns.empty()) validateUpdateColumns(storage, updated_columns, column_to_affected_materialized); + } /// First, break a sequence of commands into stages. stages.emplace_back(context); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index af84eac7f91..408b827adae 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -14,6 +14,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_ELEMENT_IN_AST; +} + static constexpr auto and_function_name = "and"; PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( @@ -400,6 +406,8 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que DatabaseAndTableWithAlias database_and_table_name(*database_and_table_ast); storage = context.getTable(database_and_table_name.database, database_and_table_name.table); } + else + throw Exception("Logical error: unexpected table expression", ErrorCodes::LOGICAL_ERROR); const auto block = storage->getSampleBlock(); for (size_t idx = 0; idx < block.columns(); idx++) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index e999489475c..65148e0682a 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -14,12 +14,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; -} - using PredicateExpressions = std::vector; using ProjectionWithAlias = std::pair; using ProjectionsWithAliases = std::vector; diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index 6ee31d0eac1..a3e1cad8d12 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -107,13 +107,13 @@ int main(int argc, char ** argv) AggregateFunctionPtr func_avg = factory.get("avg", data_types_uint64); AggregateFunctionPtr func_uniq = factory.get("uniq", data_types_uint64); - #define INIT \ - { \ - value.resize(3); \ - \ - value[0] = func_count.get();\ - value[1] = func_avg.get(); \ - value[2] = func_uniq.get(); \ + #define INIT \ + { \ + value.resize(3); \ + \ + value[0] = func_count.get(); \ + value[1] = func_avg.get(); \ + value[2] = func_uniq.get(); \ } INIT @@ -162,7 +162,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -192,7 +193,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -223,7 +225,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -248,7 +251,7 @@ int main(int argc, char ** argv) std::unordered_map>::iterator it; for (size_t i = 0; i < n; ++i) { - it = map.insert(std::make_pair(data[i], std::move(value))).first; + it = map.insert(std::make_pair(data[i], value)).first; INIT } @@ -269,7 +272,7 @@ int main(int argc, char ** argv) map.set_empty_key(-1ULL); for (size_t i = 0; i < n; ++i) { - it = map.insert(std::make_pair(data[i], std::move(value))).first; + it = map.insert(std::make_pair(data[i], value)).first; INIT } @@ -289,7 +292,7 @@ int main(int argc, char ** argv) GOOGLE_NAMESPACE::sparse_hash_map>::iterator it; for (size_t i = 0; i < n; ++i) { - map.insert(std::make_pair(data[i], std::move(value))); + map.insert(std::make_pair(data[i], value)); INIT } diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index a3dd993e8c6..e6ccf544ba1 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -304,7 +304,7 @@ BlockInputStreams StorageKafka::read( if (num_created_consumers == 0) return BlockInputStreams(); - const size_t stream_count = std::min(num_streams, num_created_consumers); + const size_t stream_count = std::min(size_t(num_streams), num_created_consumers); BlockInputStreams streams; streams.reserve(stream_count); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 119b0861fbc..bdcd28c562b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2269,7 +2269,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS for (auto state : affordable_states) { - buf = std::move(res); + std::swap(buf, res); res.clear(); auto range = getDataPartsStateRange(state); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index b6c2bbd96ee..2f1ee2a2943 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -86,7 +86,6 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() /// We will keep logs after and including this threshold. UInt64 min_saved_log_pointer = std::numeric_limits::max(); - UInt64 min_log_pointer_lost_candidate = std::numeric_limits::max(); Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log"); @@ -118,7 +117,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/host", &host_stat); String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer"); - UInt32 log_pointer = 0; + UInt64 log_pointer = 0; if (!pointer.empty()) log_pointer = parse(pointer); @@ -190,7 +189,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() for (const String & replica : recovering_replicas) { String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer"); - UInt32 log_pointer = 0; + UInt64 log_pointer = 0; if (!pointer.empty()) log_pointer = parse(pointer); min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d6275b94a0c..fcdce191169 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -648,7 +648,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo /// Let's find the action to merge this part with others. Let's remember others. StringSet parts_for_merge; - Queue::iterator merge_entry; + Queue::iterator merge_entry = queue.end(); for (Queue::iterator it = queue.begin(); it != queue.end(); ++it) { if ((*it)->type == LogEntry::MERGE_PARTS || (*it)->type == LogEntry::MUTATE_PART) diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 193ca0ebdbb..4aa8791708b 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -219,7 +220,7 @@ BlockInputStreams StorageMerge::read( size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); size_t current_streams = std::min(current_need_streams, remaining_streams); remaining_streams -= current_streams; - current_streams = std::max(1, current_streams); + current_streams = std::max(size_t(1), current_streams); StoragePtr storage = it->first; TableStructureReadLockPtr struct_lock = it->second; diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp index 1970a757b2d..8226542d9ee 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp +++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp @@ -34,7 +34,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const res->startup(); return res; } - throw new Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } void registerTableFunctionNumbers(TableFunctionFactory & factory) diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index a6bfcc6ae31..d2fdb0a8343 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -12,27 +12,6 @@ using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; - -/** This is not the best way to overcome an issue of different definitions - * of uint64_t and size_t on Linux and Mac OS X (both 64 bit). - * - * Note that on both platforms, long and long long are 64 bit types. - * But they are always different types (with the same physical representation). - */ -namespace std -{ - inline UInt64 max(unsigned long x, unsigned long long y) { return x > y ? x : y; } - inline UInt64 max(unsigned long long x, unsigned long y) { return x > y ? x : y; } - inline UInt64 min(unsigned long x, unsigned long long y) { return x < y ? x : y; } - inline UInt64 min(unsigned long long x, unsigned long y) { return x < y ? x : y; } - - inline Int64 max(long x, long long y) { return x > y ? x : y; } - inline Int64 max(long long x, long y) { return x > y ? x : y; } - inline Int64 min(long x, long long y) { return x < y ? x : y; } - inline Int64 min(long long x, long y) { return x < y ? x : y; } -} - - /// Workaround for the issue, that KDevelop doesn't see time_t and size_t types (for syntax highlight). #ifdef IN_KDEVELOP_PARSER using time_t = Int64; diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 86f15cd7a53..ed19600c870 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries (local_date_time_comparison common) add_check(multi_version) add_check(local_date_time_comparison) -add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp) +add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp gtest_max.cpp) target_link_libraries (unit_tests_libcommon common ${GTEST_MAIN_LIBRARIES}) add_check(unit_tests_libcommon) diff --git a/utils/compressor/mutator.cpp b/utils/compressor/mutator.cpp index c8cca3e6ecf..65125d073d0 100644 --- a/utils/compressor/mutator.cpp +++ b/utils/compressor/mutator.cpp @@ -106,7 +106,7 @@ static void mutate(pcg64 & generator, void * src, size_t length) && isAlphaASCII(pos[2])) { auto res = rand(generator, 0, 3); - if (res == 2) + if (res == 2) { std::swap(pos[0], pos[1]); } @@ -118,7 +118,7 @@ static void mutate(pcg64 & generator, void * src, size_t length) else if (pos + 5 <= end && pos[0] >= 0xC0 && pos[0] <= 0xDF && pos[1] >= 0x80 && pos[1] <= 0xBF && pos[2] >= 0x20 && pos[2] < 0x80 && !isAlphaASCII(pos[2]) - && pos[3] >= 0xC0 && pos[0] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF) + && pos[3] >= 0xC0 && pos[3] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF) { auto res = rand(generator, 0, 3); if (res == 2) From 43bd57eaf10f541db67543af9476c385ceab81bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 18:47:25 +0300 Subject: [PATCH 08/24] Addition to prev. revision --- libs/libcommon/src/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index ed19600c870..86f15cd7a53 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries (local_date_time_comparison common) add_check(multi_version) add_check(local_date_time_comparison) -add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp gtest_max.cpp) +add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp) target_link_libraries (unit_tests_libcommon common ${GTEST_MAIN_LIBRARIES}) add_check(unit_tests_libcommon) From 95a9b8b3f73596f0adbc6d4f530592fbc45e7fcc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 19:04:39 +0300 Subject: [PATCH 09/24] CLICKHOUSE-4245: Turn on query_log in stateless and stress tests --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/log_queries.xml | 7 +++++++ docker/test/stress/Dockerfile | 3 ++- docker/test/stress/log_queries.xml | 7 +++++++ 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 docker/test/stateless/log_queries.xml create mode 100644 docker/test/stress/log_queries.xml diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 4bdad6aa02c..bc81c298553 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,6 +25,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY zookeeper.xml /etc/clickhouse-server/config.d/zookeeper.xml COPY listen.xml /etc/clickhouse-server/config.d/listen.xml +COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stateless/log_queries.xml b/docker/test/stateless/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/docker/test/stateless/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 7987e042273..80101688118 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:18.10 RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ @@ -20,6 +20,7 @@ RUN apt-get update -y \ telnet COPY ./stress /stress +COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stress/log_queries.xml b/docker/test/stress/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/docker/test/stress/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + From 4676009c8e2e1fcdff5df3aef834f217c7462fc4 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 9 Jan 2019 19:13:03 +0300 Subject: [PATCH 10/24] PyYAML==4.2b1 --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index e8680473939..85cd355dbdc 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -18,7 +18,7 @@ mkdocs==1.0.4 Pygments==2.2.0 python-slugify==1.2.6 pytz==2017.3 -PyYAML==3.12 +PyYAML==4.2b1 recommonmark==0.4.0 requests==2.21.0 singledispatch==3.4.0.3 From 729ca697d9453bfd8345e817c91386367fcdd424 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 19:16:59 +0300 Subject: [PATCH 11/24] minor SyntaxAnalyzer refactoring --- dbms/src/Interpreters/AnalyzedJoin.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 10 +- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 8 +- .../src/Interpreters/MutationsInterpreter.cpp | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 269 +++++++----------- dbms/src/Interpreters/SyntaxAnalyzer.h | 10 +- .../evaluateConstantExpression.cpp | 2 +- .../Interpreters/evaluateMissingDefaults.cpp | 4 +- dbms/src/Storages/AlterCommands.cpp | 4 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 10 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 2 +- dbms/src/Storages/VirtualColumnUtils.cpp | 2 +- .../transformQueryForExternalDatabase.cpp | 4 +- 16 files changed, 145 insertions(+), 194 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 83dfebcd852..c39ea9c9495 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -48,7 +48,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( source_column_names.emplace_back(column.name_and_type); ASTPtr query = expression_list; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_column_names, required_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); auto joined_block_actions = analyzer.getActions(false); diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 59a8c084b9e..dcf2f0b051d 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -37,7 +37,7 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } -bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { /// Processed if (typeid_cast(node.get()) || @@ -48,6 +48,14 @@ bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr if (typeid_cast(node.get())) return false; + if (typeid_cast(node.get())) + { + /// Do not go to FROM, JOIN, UNION. + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + } + return true; } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 3bce5755a6c..8cc3d1b88c1 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -242,7 +242,7 @@ static ColumnsDeclarationAndModifiers parseColumns(const ASTExpressionList & col /// set missing types and wrap default_expression's in a conversion-function if necessary if (!defaulted_columns.empty()) { - auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, columns); + auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, columns); const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); const auto block = actions->getSampleBlock(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 0a9cb78d5f7..991f31afcdc 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -184,8 +184,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) table_lock = storage->lockStructure(false); - syntax_analyzer_result = SyntaxAnalyzer(context, storage) - .analyze(query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth); + syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( + query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); @@ -792,7 +792,7 @@ void InterpreterSelectQuery::executeFetchColumns( } auto additional_source_columns_set = ext::map(additional_source_columns, [] (const auto & it) { return it.name; }); - auto syntax_result = SyntaxAnalyzer(context, storage).analyze(required_columns_expr_list, additional_source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(required_columns_expr_list, additional_source_columns, {}, storage); alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. @@ -829,7 +829,7 @@ void InterpreterSelectQuery::executeFetchColumns( } prewhere_info->prewhere_actions = std::move(new_actions); - auto analyzed_result = SyntaxAnalyzer(context, {}).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical()); + auto analyzed_result = SyntaxAnalyzer(context).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical()); prewhere_info->alias_actions = ExpressionAnalyzer(required_prewhere_columns_expr_list, analyzed_result, context) .getActions(true, false); diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index d59fc811338..fbf64f081f7 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -194,7 +194,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (col_default.kind == ColumnDefaultKind::Materialized) { auto query = col_default.expression->clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); ExpressionAnalyzer analyzer(query, syntax_result, context); for (const String & dependency : analyzer.getRequiredSourceColumns()) { @@ -301,7 +301,7 @@ void MutationsInterpreter::prepare(bool dry_run) for (const String & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(all_asts, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(all_asts, all_columns); stage.analyzer = std::make_unique(all_asts, syntax_result, context); ExpressionActionsChain & actions_chain = stage.expressions_chain; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 3d9a7f55df3..4bce5ce7777 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -42,141 +42,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; } -namespace -{ - -using LogAST = DebugASTLog; /// set to true to enable logs -using Aliases = SyntaxAnalyzerResult::Aliases; - -/// Add columns from storage to source_columns list. -void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, - StoragePtr & storage, NamesAndTypesList & source_columns); - -/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. -void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); - -/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases. -void normalizeTree( - ASTPtr & query, - SyntaxAnalyzerResult & result, - const Names & source_columns, - const NameSet & source_columns_set, - const StoragePtr & storage, - const Context & context, - const ASTSelectQuery * select_query, - bool asterisk_left_columns_only); - -/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. -/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. -void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns); - -/// Replacing scalar subqueries with constant values. -void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query, - const Context & context, size_t subquery_depth); - -/// Remove Function_if AST if condition is constant. -void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases); - -/// Eliminates injective function calls and constant expressions from group by statement. -void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context); - -/// Remove duplicate items from ORDER BY. -void optimizeOrderBy(const ASTSelectQuery * select_query); - -/// Remove duplicate items from LIMIT BY. -void optimizeLimitBy(const ASTSelectQuery * select_query); - -/// Remove duplicated columns from USING(...). -void optimizeUsing(const ASTSelectQuery * select_query); - -void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, - const Names & source_columns, const NameSet & source_columns_set); - -/// Parse JOIN ON expression and collect ASTs for joined columns. -void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); - -/// Find the columns that are obtained by JOIN. -void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); -} - -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( - ASTPtr & query, - const NamesAndTypesList & source_columns_, - const Names & required_result_columns, - size_t subquery_depth) const -{ - SyntaxAnalyzerResult result; - result.storage = storage; - result.source_columns = source_columns_; - auto * select_query = typeid_cast(query.get()); - collectSourceColumns(select_query, context, result.storage, result.source_columns); - - const auto & settings = context.getSettingsRef(); - - Names source_columns_list; - source_columns_list.reserve(result.source_columns.size()); - for (const auto & type_name : result.source_columns) - source_columns_list.emplace_back(type_name.name); - NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); - - translateQualifiedNames(query, select_query, source_columns_set, context); - - /// Depending on the user's profile, check for the execution rights - /// distributed subqueries inside the IN or JOIN sections and process these subqueries. - InJoinSubqueriesPreprocessor(context).process(select_query); - - /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); - - /// Creates a dictionary `aliases`: alias -> ASTPtr - { - LogAST log; - QueryAliasesVisitor::Data query_aliases_data{result.aliases}; - QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); - } - - /// Common subexpression elimination. Rewrite rules. - normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, - context, select_query, settings.asterisk_left_columns_only != 0); - - /// Remove unneeded columns according to 'required_result_columns'. - /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. - /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) - /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns); - - /// Executing scalar subqueries - replacing them with constant values. - executeScalarSubqueries(query, select_query, context, subquery_depth); - - /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. - optimizeIfWithConstantCondition(query, result.aliases); - - /// GROUP BY injective function elimination. - optimizeGroupBy(select_query, source_columns_set, context); - - /// Remove duplicate items from ORDER BY. - optimizeOrderBy(select_query); - - // Remove duplicated elements from LIMIT BY clause. - optimizeLimitBy(select_query); - - /// Remove duplicated columns from USING(...). - optimizeUsing(select_query); - - /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); - - /// Push the predicate expression down to the subqueries. - result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - - collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); - - return std::make_shared(result); -} - void removeDuplicateColumns(NamesAndTypesList & columns) { std::set names; @@ -192,15 +57,12 @@ void removeDuplicateColumns(NamesAndTypesList & columns) namespace { -void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, - StoragePtr & storage, NamesAndTypesList & source_columns) -{ - if (!storage && select_query) - { - if (auto db_and_table = getDatabaseAndTable(*select_query, 0)) - storage = context.tryGetTable(db_and_table->database, db_and_table->table); - } +using LogAST = DebugASTLog; /// set to true to enable logs + +/// Add columns from storage to source_columns list. +void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, NamesAndTypesList & source_columns) +{ if (storage) { auto physical_columns = storage->getColumns().getAllPhysical(); @@ -219,6 +81,7 @@ void collectSourceColumns(ASTSelectQuery * select_query, const Context & context removeDuplicateColumns(source_columns); } +/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -233,6 +96,7 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, visitor.visit(query); } +/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases. void normalizeTree( ASTPtr & query, SyntaxAnalyzerResult & result, @@ -297,6 +161,8 @@ bool hasArrayJoin(const ASTPtr & ast) return false; } +/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. +/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) { if (!select_query) @@ -335,29 +201,12 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, elements = std::move(new_elements); } -void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query, - const Context & context, size_t subquery_depth) +/// Replacing scalar subqueries with constant values. +void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth) { LogAST log; - - if (!select_query) - { - ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; - ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); - } - else - { - for (auto & child : query->children) - { - /// Do not go to FROM, JOIN, UNION. - if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) - { - ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; - ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(child); - } - } - } + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) @@ -394,7 +243,8 @@ bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) return false; } -void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases) +/// Remove Function_if AST if condition is constant. +void optimizeIfWithConstantCondition(ASTPtr & current_ast, SyntaxAnalyzerResult::Aliases & aliases) { if (!current_ast) return; @@ -491,6 +341,7 @@ const std::unordered_set possibly_injective_function_names "dictGetDateTime" }; +/// Eliminates injective function calls and constant expressions from group by statement. void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { if (!(select_query && select_query->group_expression_list)) @@ -594,6 +445,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum } } +/// Remove duplicate items from ORDER BY. void optimizeOrderBy(const ASTSelectQuery * select_query) { if (!(select_query && select_query->order_expression_list)) @@ -620,6 +472,7 @@ void optimizeOrderBy(const ASTSelectQuery * select_query) elems = unique_elems; } +/// Remove duplicate items from LIMIT BY. void optimizeLimitBy(const ASTSelectQuery * select_query) { if (!(select_query && select_query->limit_by_expression_list)) @@ -641,6 +494,7 @@ void optimizeLimitBy(const ASTSelectQuery * select_query) elems = unique_elems; } +/// Remove duplicated columns from USING(...). void optimizeUsing(const ASTSelectQuery * select_query) { if (!select_query) @@ -740,6 +594,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } } +/// Parse JOIN ON expression and collect ASTs for joined columns. void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -899,6 +754,7 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS add_columns_from_equals_expr(table_join.on_expression); } +/// Find the columns that are obtained by JOIN. void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -969,4 +825,87 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s } + +SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( + ASTPtr & query, + const NamesAndTypesList & source_columns_, + const Names & required_result_columns, + StoragePtr storage) const +{ + auto * select_query = typeid_cast(query.get()); + if (!storage && select_query) + { + if (auto db_and_table = getDatabaseAndTable(*select_query, 0)) + storage = context.tryGetTable(db_and_table->database, db_and_table->table); + } + + SyntaxAnalyzerResult result; + result.storage = storage; + result.source_columns = source_columns_; + + collectSourceColumns(select_query, result.storage, result.source_columns); + + const auto & settings = context.getSettingsRef(); + + Names source_columns_list; + source_columns_list.reserve(result.source_columns.size()); + for (const auto & type_name : result.source_columns) + source_columns_list.emplace_back(type_name.name); + NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); + + translateQualifiedNames(query, select_query, source_columns_set, context); + + /// Depending on the user's profile, check for the execution rights + /// distributed subqueries inside the IN or JOIN sections and process these subqueries. + InJoinSubqueriesPreprocessor(context).process(select_query); + + /// Optimizes logical expressions. + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + + /// Creates a dictionary `aliases`: alias -> ASTPtr + { + LogAST log; + QueryAliasesVisitor::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); + } + + /// Common subexpression elimination. Rewrite rules. + normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, + context, select_query, settings.asterisk_left_columns_only != 0); + + /// Remove unneeded columns according to 'required_result_columns'. + /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. + /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) + /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. + removeUnneededColumnsFromSelectClause(select_query, required_result_columns); + + /// Executing scalar subqueries - replacing them with constant values. + executeScalarSubqueries(query, context, subquery_depth); + + /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. + optimizeIfWithConstantCondition(query, result.aliases); + + /// GROUP BY injective function elimination. + optimizeGroupBy(select_query, source_columns_set, context); + + /// Remove duplicate items from ORDER BY. + optimizeOrderBy(select_query); + + /// Remove duplicated elements from LIMIT BY clause. + optimizeLimitBy(select_query); + + /// Remove duplicated columns from USING(...). + optimizeUsing(select_query); + + /// array_join_alias_to_name, array_join_result_to_source. + getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); + + /// Push the predicate expression down to the subqueries. + result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); + + collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + + return std::make_shared(result); +} + } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 38595917917..5500823b3c2 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -54,16 +54,20 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr; class SyntaxAnalyzer { public: - SyntaxAnalyzer(const Context & context, StoragePtr storage) : context(context), storage(std::move(storage)) {} + SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0) + : context(context_) + , subquery_depth(subquery_depth_) + {} SyntaxAnalyzerResultPtr analyze( ASTPtr & query, const NamesAndTypesList & source_columns_, const Names & required_result_columns = {}, - size_t subquery_depth = 0) const; + StoragePtr storage = {}) const; +private: const Context & context; - StoragePtr storage; + size_t subquery_depth; }; } diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 29753a4c637..8f96160186d 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -31,7 +31,7 @@ std::pair> evaluateConstantExpression(co { NamesAndTypesList source_columns = {{ "_dummy", std::make_shared() }}; auto ast = node->clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns); ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions(); /// There must be at least one column in the block so that it knows the number of rows. diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 33dce42ab8e..9a6884b25e3 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -48,7 +48,7 @@ void evaluateMissingDefaults(Block & block, if (!save_unneeded_columns) { - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); return; } @@ -57,7 +57,7 @@ void evaluateMissingDefaults(Block & block, * we are going to operate on a copy instead of the original block */ Block copy_block{block}; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); /// move evaluated columns to the original block, materializing them at the same time diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 332ccfde3f0..b5fbe0f3314 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -398,7 +398,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) { const auto & default_expression = default_column.second.expression; ASTPtr query = default_expression; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true); const auto required_columns = actions->getRequiredColumns(); @@ -473,7 +473,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } ASTPtr query = default_expr_list; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true); const auto block = actions->getSampleBlock(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 119b0861fbc..30c38282d76 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -126,7 +126,7 @@ MergeTreeData::MergeTreeData( && !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - auto syntax = SyntaxAnalyzer(global_context, {}).analyze(sample_by_ast, getColumns().getAllPhysical()); + auto syntax = SyntaxAnalyzer(global_context).analyze(sample_by_ast, getColumns().getAllPhysical()); columns_required_for_sampling = ExpressionAnalyzer(sample_by_ast, syntax, global_context) .getRequiredSourceColumns(); } @@ -282,7 +282,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( if (!added_key_column_expr_list->children.empty()) { - auto syntax = SyntaxAnalyzer(global_context, {}).analyze(added_key_column_expr_list, all_columns); + auto syntax = SyntaxAnalyzer(global_context).analyze(added_key_column_expr_list, all_columns); Names used_columns = ExpressionAnalyzer(added_key_column_expr_list, syntax, global_context) .getRequiredSourceColumns(); @@ -305,7 +305,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( } } - auto new_sorting_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_sorting_key_expr_list, all_columns); + auto new_sorting_key_syntax = SyntaxAnalyzer(global_context).analyze(new_sorting_key_expr_list, all_columns); auto new_sorting_key_expr = ExpressionAnalyzer(new_sorting_key_expr_list, new_sorting_key_syntax, global_context) .getActions(false); auto new_sorting_key_sample = @@ -314,7 +314,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( checkKeyExpression(*new_sorting_key_expr, new_sorting_key_sample, "Sorting"); - auto new_primary_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_primary_key_expr_list, all_columns); + auto new_primary_key_syntax = SyntaxAnalyzer(global_context).analyze(new_primary_key_expr_list, all_columns); auto new_primary_key_expr = ExpressionAnalyzer(new_primary_key_expr_list, new_primary_key_syntax, global_context) .getActions(false); @@ -376,7 +376,7 @@ void MergeTreeData::initPartitionKey() return; { - auto syntax_result = SyntaxAnalyzer(global_context, {}).analyze(partition_key_expr_list, getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(global_context).analyze(partition_key_expr_list, getColumns().getAllPhysical()); partition_key_expr = ExpressionAnalyzer(partition_key_expr_list, syntax_result, global_context).getActions(false); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dd5a35ad710..01fb3169013 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -488,7 +488,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( } ASTPtr query = filter_function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, available_real_columns); filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); /// Add columns needed for `sample_by_ast` to `column_names_to_read`. @@ -848,7 +848,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( arguments->children.push_back(one); ASTPtr query = function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, data.getColumns().getAllPhysical()); out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); out_column = function->getColumnName(); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index e1baea8f9eb..8f4f31d458c 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -170,7 +170,7 @@ StorageDistributed::~StorageDistributed() = default; static ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, const Context & context, NamesAndTypesList columns, bool project) { ASTPtr query = sharding_key; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, columns); return ExpressionAnalyzer(query, syntax_result, context).getActions(project); } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 193ca0ebdbb..931105df1ef 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -452,7 +452,7 @@ void StorageMerge::convertingSourceStream(const Block & header, const Context & NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList(); NameAndTypePair virtual_column = getColumn("_table"); source_columns.insert(source_columns.end(), virtual_column); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(where_expression, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false); Names required_columns = actions->getRequiredColumns(); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index d78a7a36727..6ce3e58cc75 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c return; /// Let's analyze and calculate the expression. - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(expression_ast, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); ExpressionActionsPtr actions = analyzer.getActions(false); diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index f37e51b714e..0131d9f2162 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -28,7 +28,7 @@ static void replaceConstFunction(IAST & node, const Context & context, const Nam { NamesAndTypesList source_columns = all_columns; ASTPtr query = function->ptr(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_columns); auto result_block = KeyCondition::getBlockWithConstants(query, syntax_result, context); if (!result_block.has(child->getColumnName())) return; @@ -92,7 +92,7 @@ String transformQueryForExternalDatabase( const Context & context) { auto clone_query = query.clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(clone_query, available_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(clone_query, available_columns); ExpressionAnalyzer analyzer(clone_query, syntax_result, context); const Names & used_columns = analyzer.getRequiredSourceColumns(); From 99330c77adb3de1a82ebab48643f2e613047859a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 19:23:20 +0300 Subject: [PATCH 12/24] Enable part_log in docker test images by default --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/part_log.xml | 8 ++++++++ docker/test/stress/Dockerfile | 1 + docker/test/stress/part_log.xml | 8 ++++++++ 4 files changed, 18 insertions(+) create mode 100644 docker/test/stateless/part_log.xml create mode 100644 docker/test/stress/part_log.xml diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index bc81c298553..90b4068fb93 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,6 +25,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY zookeeper.xml /etc/clickhouse-server/config.d/zookeeper.xml COPY listen.xml /etc/clickhouse-server/config.d/listen.xml +COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ diff --git a/docker/test/stateless/part_log.xml b/docker/test/stateless/part_log.xml new file mode 100644 index 00000000000..6c6fc9c6982 --- /dev/null +++ b/docker/test/stateless/part_log.xml @@ -0,0 +1,8 @@ + + + system + part_log
+ + 7500 +
+
diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 80101688118..8f2524930c1 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -21,6 +21,7 @@ RUN apt-get update -y \ COPY ./stress /stress COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml +COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stress/part_log.xml b/docker/test/stress/part_log.xml new file mode 100644 index 00000000000..6c6fc9c6982 --- /dev/null +++ b/docker/test/stress/part_log.xml @@ -0,0 +1,8 @@ + + + system + part_log
+ + 7500 +
+
From c93b54a88d80cd0d1b42ba0253cfbfa52cec4b47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 19:32:34 +0300 Subject: [PATCH 13/24] Removed obsolete tweak, because KDevelop has migrated to clang parser long time ago --- libs/libcommon/include/common/Types.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index d2fdb0a8343..70c9c3d2f3c 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -1,6 +1,5 @@ #pragma once #include -#include using Int8 = int8_t; using Int16 = int16_t; @@ -11,9 +10,3 @@ using UInt8 = uint8_t; using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; - -/// Workaround for the issue, that KDevelop doesn't see time_t and size_t types (for syntax highlight). -#ifdef IN_KDEVELOP_PARSER - using time_t = Int64; - using size_t = UInt64; -#endif From 7ba268049aca6eb21e327a5bb7de05ede90ffc2f Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 20:06:40 +0300 Subject: [PATCH 14/24] extract OptimizeIfWithConstantConditionVisitor from SyntaxAnalyzer --- ...OptimizeIfWithConstantConditionVisitor.cpp | 108 ++++++++++++++++++ .../OptimizeIfWithConstantConditionVisitor.h | 27 +++++ dbms/src/Interpreters/SyntaxAnalyzer.cpp | 97 +--------------- 3 files changed, 137 insertions(+), 95 deletions(-) create mode 100644 dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp create mode 100644 dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h diff --git a/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp new file mode 100644 index 00000000000..e73a734ab16 --- /dev/null +++ b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) +{ + /// numeric constant in condition + if (const ASTLiteral * literal = typeid_cast(condition.get())) + { + if (literal->value.getType() == Field::Types::Int64 || + literal->value.getType() == Field::Types::UInt64) + { + value = literal->value.get(); + return true; + } + } + + /// cast of numeric constant in condition to UInt8 + if (const ASTFunction * function = typeid_cast(condition.get())) + { + if (function->name == "CAST") + { + if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) + { + const ASTPtr & type_ast = expr_list->children.at(1); + if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) + { + if (type_literal->value.getType() == Field::Types::String && + type_literal->value.get() == "UInt8") + return tryExtractConstValueFromCondition(expr_list->children.at(0), value); + } + } + } + } + + return false; +} + +void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +{ + if (!current_ast) + return; + + for (ASTPtr & child : current_ast->children) + { + auto * function_node = typeid_cast(child.get()); + if (!function_node || function_node->name != "if") + { + visit(child); + continue; + } + + visit(function_node->arguments); + auto * args = typeid_cast(function_node->arguments.get()); + + if (args->children.size() != 3) + throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) + { + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = child; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = child->tryGetAlias(); + + if (replace_alias.empty()) + { + replace_ast->setAlias(if_alias); + child = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + child = replace_ast_deep_copy; + } + + if (!if_alias.empty()) + { + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = child; + } + } + } +} + +} diff --git a/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h new file mode 100644 index 00000000000..ee738ec05e2 --- /dev/null +++ b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/// It removes Function_if node from AST if condition is constant. +/// TODO: rewrite with InDepthNodeVisitor +class OptimizeIfWithConstantConditionVisitor +{ +public: + using Aliases = std::unordered_map; + + OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + : aliases(aliases_) + {} + + void visit(ASTPtr & ast); + +private: + Aliases & aliases; +}; + +} diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 4bce5ce7777..cd156076b79 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ALIAS_REQUIRED; extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; extern const int EMPTY_NESTED_TABLE; @@ -209,99 +209,6 @@ void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t sub ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } -bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) -{ - /// numeric constant in condition - if (const ASTLiteral * literal = typeid_cast(condition.get())) - { - if (literal->value.getType() == Field::Types::Int64 || - literal->value.getType() == Field::Types::UInt64) - { - value = literal->value.get(); - return true; - } - } - - /// cast of numeric constant in condition to UInt8 - if (const ASTFunction * function = typeid_cast(condition.get())) - { - if (function->name == "CAST") - { - if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) - { - const ASTPtr & type_ast = expr_list->children.at(1); - if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) - { - if (type_literal->value.getType() == Field::Types::String && - type_literal->value.get() == "UInt8") - return tryExtractConstValueFromCondition(expr_list->children.at(0), value); - } - } - } - } - - return false; -} - -/// Remove Function_if AST if condition is constant. -void optimizeIfWithConstantCondition(ASTPtr & current_ast, SyntaxAnalyzerResult::Aliases & aliases) -{ - if (!current_ast) - return; - - for (ASTPtr & child : current_ast->children) - { - auto * function_node = typeid_cast(child.get()); - if (!function_node || function_node->name != "if") - { - optimizeIfWithConstantCondition(child, aliases); - continue; - } - - optimizeIfWithConstantCondition(function_node->arguments, aliases); - auto * args = typeid_cast(function_node->arguments.get()); - - if (args->children.size() != 3) - throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) - { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } - } - } -} - /** Calls to these functions in the GROUP BY statement would be * replaced by their immediate argument. */ @@ -883,7 +790,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( executeScalarSubqueries(query, context, subquery_depth); /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. - optimizeIfWithConstantCondition(query, result.aliases); + OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); /// GROUP BY injective function elimination. optimizeGroupBy(select_query, source_columns_set, context); From 4712fd668ea83e5d3f445aa58e7d490f358705a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 20:31:30 +0300 Subject: [PATCH 15/24] Addition to prev. revision --- dbms/src/IO/Progress.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/IO/Progress.h b/dbms/src/IO/Progress.h index c21196befac..7dca03f03c2 100644 --- a/dbms/src/IO/Progress.h +++ b/dbms/src/IO/Progress.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include From c460a270daf426f4be6bc37453ac18705aeb240b Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 20:40:26 +0300 Subject: [PATCH 16/24] enlight when SyntaxAnalyzer affects only selects --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 67 +++++++++++------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index cd156076b79..364cf221f35 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -85,7 +85,7 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!select_query || !select_query->tables || select_query->tables->children.empty()) + if (!select_query->tables || select_query->tables->children.empty()) return; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); @@ -165,9 +165,6 @@ bool hasArrayJoin(const ASTPtr & ast) /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) { - if (!select_query) - return; - if (required_result_columns.empty()) return; @@ -251,7 +248,7 @@ const std::unordered_set possibly_injective_function_names /// Eliminates injective function calls and constant expressions from group by statement. void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!(select_query && select_query->group_expression_list)) + if (!select_query->group_expression_list) return; const auto is_literal = [] (const ASTPtr & ast) @@ -355,7 +352,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum /// Remove duplicate items from ORDER BY. void optimizeOrderBy(const ASTSelectQuery * select_query) { - if (!(select_query && select_query->order_expression_list)) + if (!select_query->order_expression_list) return; /// Make unique sorting conditions. @@ -382,7 +379,7 @@ void optimizeOrderBy(const ASTSelectQuery * select_query) /// Remove duplicate items from LIMIT BY. void optimizeLimitBy(const ASTSelectQuery * select_query) { - if (!(select_query && select_query->limit_by_expression_list)) + if (!select_query->limit_by_expression_list) return; std::set elems_set; @@ -404,9 +401,6 @@ void optimizeLimitBy(const ASTSelectQuery * select_query) /// Remove duplicated columns from USING(...). void optimizeUsing(const ASTSelectQuery * select_query) { - if (!select_query) - return; - auto node = const_cast(select_query->join()); if (!node) return; @@ -437,9 +431,6 @@ void optimizeUsing(const ASTSelectQuery * select_query) void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, const Names & source_columns, const NameSet & source_columns_set) { - if (!select_query) - return; - ASTPtr array_join_expression_list = select_query->array_join_expression_list(); if (array_join_expression_list) { @@ -665,9 +656,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!select_query) - return; - const ASTTablesInSelectQueryElement * node = select_query->join(); if (!node) @@ -760,14 +748,17 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( source_columns_list.emplace_back(type_name.name); NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); - translateQualifiedNames(query, select_query, source_columns_set, context); + if (select_query) + { + translateQualifiedNames(query, select_query, source_columns_set, context); - /// Depending on the user's profile, check for the execution rights - /// distributed subqueries inside the IN or JOIN sections and process these subqueries. - InJoinSubqueriesPreprocessor(context).process(select_query); + /// Depending on the user's profile, check for the execution rights + /// distributed subqueries inside the IN or JOIN sections and process these subqueries. + InJoinSubqueriesPreprocessor(context).process(select_query); - /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + /// Optimizes logical expressions. + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + } /// Creates a dictionary `aliases`: alias -> ASTPtr { @@ -784,7 +775,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns); + if (select_query) + removeUnneededColumnsFromSelectClause(select_query, required_result_columns); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, context, subquery_depth); @@ -792,25 +784,28 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); - /// GROUP BY injective function elimination. - optimizeGroupBy(select_query, source_columns_set, context); + if (select_query) + { + /// GROUP BY injective function elimination. + optimizeGroupBy(select_query, source_columns_set, context); - /// Remove duplicate items from ORDER BY. - optimizeOrderBy(select_query); + /// Remove duplicate items from ORDER BY. + optimizeOrderBy(select_query); - /// Remove duplicated elements from LIMIT BY clause. - optimizeLimitBy(select_query); + /// Remove duplicated elements from LIMIT BY clause. + optimizeLimitBy(select_query); - /// Remove duplicated columns from USING(...). - optimizeUsing(select_query); + /// Remove duplicated columns from USING(...). + optimizeUsing(select_query); - /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); + /// array_join_alias_to_name, array_join_result_to_source. + getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); - /// Push the predicate expression down to the subqueries. - result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); + /// Push the predicate expression down to the subqueries. + result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + } return std::make_shared(result); } From 006a764df9a9f1456555c6902b238c2ddb04c684 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 9 Jan 2019 20:52:25 +0300 Subject: [PATCH 17/24] don't use pool for TaskStatsInfoGetter [#CLICKHOUSE-4209] Pool is not needed because creation of a TaskStatsInfoGetter takes an order of 10us. Also pool is harmful because created sockets are never closed. --- dbms/src/Common/CurrentThread.cpp | 3 --- dbms/src/Common/ThreadStatus.cpp | 5 +---- dbms/src/Common/ThreadStatus.h | 4 +--- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index a3919108724..b2f165e5469 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -3,7 +3,6 @@ #include "CurrentThread.h" #include #include -#include #include #include #include @@ -24,8 +23,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -SimpleObjectPool task_stats_info_getter_pool; - // Smoker's implementation to avoid thread_local usage: error: undefined symbol: __cxa_thread_atexit #if defined(ARCADIA_ROOT) struct ThreadStatusPtrHolder : ThreadStatusPtr diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index 7a321cdaeb7..0ee09d527ce 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -21,9 +21,6 @@ namespace ErrorCodes } -extern SimpleObjectPool task_stats_info_getter_pool; - - TasksStatsCounters TasksStatsCounters::current() { TasksStatsCounters res; @@ -74,7 +71,7 @@ void ThreadStatus::initPerformanceCounters() if (TaskStatsInfoGetter::checkPermissions()) { if (!taskstats_getter) - taskstats_getter = task_stats_info_getter_pool.getDefault(); + taskstats_getter = std::make_unique(); *last_taskstats = TasksStatsCounters::current(); } diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 822e1931447..3f7a91a54f0 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -2,7 +2,6 @@ #include #include -#include #include @@ -175,8 +174,7 @@ protected: std::unique_ptr last_taskstats; /// Set to non-nullptr only if we have enough capabilities. - /// We use pool because creation and destruction of TaskStatsInfoGetter objects are expensive. - SimpleObjectPool::Pointer taskstats_getter; + std::unique_ptr taskstats_getter; }; } From 1dab649bf3b5f3b88de4bdd38d25994f0c93bdf4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 9 Jan 2019 21:16:54 +0300 Subject: [PATCH 18/24] Revert "Apply upstream jemalloc patch for potential leak" --- contrib/jemalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc b/contrib/jemalloc index cd2931ad9bb..41b7372eade 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff +Subproject commit 41b7372eadee941b9164751b8d4963f915d3ceae From 64c2c3650c556dae0e449c06322301c07fbd5b87 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 9 Jan 2019 21:32:43 +0300 Subject: [PATCH 19/24] cmake: Dont cache version; macos: fix build; /release_lib.sh move to utils (#4016) --- dbms/CMakeLists.txt | 2 +- dbms/cmake/version.cmake | 20 +++++++++---------- dbms/src/Common/ShellCommand.cpp | 2 +- release | 2 +- .../release/release_lib.sh | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) rename release_lib.sh => utils/release/release_lib.sh (99%) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 40670f391bc..84c4b76d6fb 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -16,7 +16,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h) set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h) include (cmake/version.cmake) -message (STATUS "Will build ${VERSION_FULL}") +message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}") configure_file (src/Common/config.h.in ${CONFIG_COMMON}) configure_file (src/Common/config_version.h.in ${CONFIG_VERSION}) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 61bf2b2a6f9..94a10028518 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54413 CACHE STRING "") # changed manually for tests -set(VERSION_MAJOR 19 CACHE STRING "") -set(VERSION_MINOR 1 CACHE STRING "") -set(VERSION_PATCH 0 CACHE STRING "") -set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067 CACHE STRING "") -set(VERSION_DESCRIBE v19.1.0-testing CACHE STRING "") -set(VERSION_STRING 19.1.0 CACHE STRING "") +set(VERSION_REVISION 54413) +set(VERSION_MAJOR 19) +set(VERSION_MINOR 1) +set(VERSION_PATCH 0) +set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067) +set(VERSION_DESCRIBE v19.1.0-testing) +set(VERSION_STRING 19.1.0) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") @@ -19,8 +19,8 @@ if (VERSION_EXTRA) string(CONCAT VERSION_STRING ${VERSION_STRING} "." ${VERSION_EXTRA}) endif () -set (VERSION_NAME "${PROJECT_NAME}" CACHE STRING "") -set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}" CACHE STRING "") -set (VERSION_SO "${VERSION_STRING}" CACHE STRING "") +set (VERSION_NAME "${PROJECT_NAME}") +set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") +set (VERSION_SO "${VERSION_STRING}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") diff --git a/dbms/src/Common/ShellCommand.cpp b/dbms/src/Common/ShellCommand.cpp index f9835efe68d..84961292d02 100644 --- a/dbms/src/Common/ShellCommand.cpp +++ b/dbms/src/Common/ShellCommand.cpp @@ -36,7 +36,7 @@ namespace if (0 != pipe2(fds_rw, O_CLOEXEC)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); #else - if (0 != pipe(fds)) + if (0 != pipe(fds_rw)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); diff --git a/release b/release index 23bfd6f2dd6..e3c8842a820 100755 --- a/release +++ b/release @@ -32,7 +32,7 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cd $CURDIR -source "./release_lib.sh" +source "./utils/release/release_lib.sh" PBUILDER_AUTOUPDATE=${PBUILDER_AUTOUPDATE=4320} diff --git a/release_lib.sh b/utils/release/release_lib.sh similarity index 99% rename from release_lib.sh rename to utils/release/release_lib.sh index ecdc10deefe..45a01e3f745 100644 --- a/release_lib.sh +++ b/utils/release/release_lib.sh @@ -9,7 +9,7 @@ function gen_version_string { } function get_version { - BASEDIR=$(dirname "${BASH_SOURCE[0]}") + BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` From 1eb0750cfa6049ff528ac5d1273899c7ac4e0dd5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 13:35:17 +0300 Subject: [PATCH 20/24] Get clang-7 from clang repo and rollback to ubuntu 18.04 --- docker/packager/binary/Dockerfile | 4 +++- docker/packager/deb/Dockerfile | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index dfad9f9e123..b7ed6e68b7a 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,4 +1,6 @@ -FROM ubuntu:18.10 +FROM ubuntu:18.04 + +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 384ab76f625..98cda15a587 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,8 +1,10 @@ -FROM ubuntu:18.10 +FROM ubuntu:18.04 -RUN apt-get update -y \ +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list + +RUN apt-get --allow-unauthenticated update -y \ && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --yes --no-install-recommends \ + apt-get --allow-unauthenticated install --yes --no-install-recommends \ bash \ fakeroot \ cmake \ @@ -33,8 +35,8 @@ RUN apt-get update -y \ devscripts \ debhelper \ git \ - libc++abi-dev \ libc++-dev \ + libc++abi-dev \ libboost-program-options-dev \ libboost-system-dev \ libboost-filesystem-dev \ From 6401628dc85bd3a1f7aff3b840844c200e13d30d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 14:25:59 +0300 Subject: [PATCH 21/24] Fix clang-7 werrors --- dbms/src/Common/config.h.in | 1 + dbms/src/Interpreters/ExpressionJIT.cpp | 30 ++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 624c87b91b5..09c2eadde29 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -16,6 +16,7 @@ #cmakedefine01 USE_BASE64 #cmakedefine01 USE_HDFS #cmakedefine01 USE_XXHASH +#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 123778c6fe9..9ac95e3a107 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -161,21 +161,21 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr) // Actually this should work for 7.0.0 but now we have OLDER 7.0.0svn in contrib auto flags = [&](const llvm::orc::SymbolNameSet & symbols) { - llvm::orc::SymbolFlagsMap flags; + llvm::orc::SymbolFlagsMap flags_map; for (const auto & symbol : symbols) { auto resolved = jsr.lookupFlags({*symbol}); if (resolved && resolved->size()) - flags.emplace(symbol, resolved->begin()->second); + flags_map.emplace(symbol, resolved->begin()->second); } - return flags; + return flags_map; }; #endif - auto symbols = [&](std::shared_ptr query, llvm::orc::SymbolNameSet symbols) + auto symbols = [&](std::shared_ptr query, llvm::orc::SymbolNameSet symbols_set) { llvm::orc::SymbolNameSet missing; - for (const auto & symbol : symbols) + for (const auto & symbol : symbols_set) { auto resolved = jsr.lookup({*symbol}); if (resolved && resolved->size()) @@ -275,20 +275,20 @@ struct LLVMContext { if (!module->size()) return 0; - llvm::PassManagerBuilder builder; + llvm::PassManagerBuilder pass_manager_builder; llvm::legacy::PassManager mpm; llvm::legacy::FunctionPassManager fpm(module.get()); - builder.OptLevel = 3; - builder.SLPVectorize = true; - builder.LoopVectorize = true; - builder.RerollLoops = true; - builder.VerifyInput = true; - builder.VerifyOutput = true; - machine->adjustPassManager(builder); + pass_manager_builder.OptLevel = 3; + pass_manager_builder.SLPVectorize = true; + pass_manager_builder.LoopVectorize = true; + pass_manager_builder.RerollLoops = true; + pass_manager_builder.VerifyInput = true; + pass_manager_builder.VerifyOutput = true; + machine->adjustPassManager(pass_manager_builder); fpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); mpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - builder.populateFunctionPassManager(fpm); - builder.populateModulePassManager(mpm); + pass_manager_builder.populateFunctionPassManager(fpm); + pass_manager_builder.populateModulePassManager(mpm); fpm.doInitialization(); for (auto & function : *module) fpm.run(function); From 8ae59da55758fa2e088557bcad2292e5efd168d9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 14:46:20 +0300 Subject: [PATCH 22/24] Fix name hiding --- dbms/src/Functions/IFunction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 6b6186302f7..ac5d1122e4a 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -512,8 +512,8 @@ static std::optional removeNullables(const DataTypes & types) if (!typeid_cast(type.get())) continue; DataTypes filtered; - for (const auto & type : types) - filtered.emplace_back(removeNullable(type)); + for (const auto & sub_type : types) + filtered.emplace_back(removeNullable(sub_type)); return filtered; } return {}; From d310d1a5ec383f9c9ba3c0475159f99473d8fb0f Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 19:51:49 +0300 Subject: [PATCH 23/24] fixed setSkipIndexes --- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 54 +++++++++++-------- dbms/src/Storages/MergeTree/MergeTreeData.h | 1 + dbms/src/Storages/StorageMergeTree.cpp | 1 + .../Storages/StorageReplicatedMergeTree.cpp | 1 + 4 files changed, 36 insertions(+), 21 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 3801b81530d..c2396e02988 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -116,6 +116,7 @@ MergeTreeData::MergeTreeData( data_parts_by_state_and_info(data_parts_indexes.get()) { setPrimaryKeyAndColumns(order_by_ast_, primary_key_ast_, columns_); + setSkipIndexes(indexes_ast_); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -189,8 +190,6 @@ MergeTreeData::MergeTreeData( throw Exception( "MergeTree data format version on disk doesn't support custom partitioning", ErrorCodes::METADATA_MISMATCH); - - setSkipIndexes(indexes_ast_); } @@ -356,27 +355,32 @@ void MergeTreeData::setSkipIndexes(const ASTPtr & indexes_asts, bool only_check) { return; } + + MergeTreeIndexes new_indexes; + std::set names; + auto index_list = std::dynamic_pointer_cast(indexes_asts); + + for (const auto &index_ast : index_list->children) + { + new_indexes.push_back( + std::move(MergeTreeIndexFactory::instance().get( + *this, + std::dynamic_pointer_cast(index_ast), + global_context))); + + if (names.find(new_indexes.back()->name) != names.end()) + { + throw Exception( + "Index with name `" + new_indexes.back()->name + "` already exsists", + ErrorCodes::LOGICAL_ERROR); + } + names.insert(new_indexes.back()->name); + } + if (!only_check) { - indexes.clear(); - std::set names; - auto index_list = std::dynamic_pointer_cast(indexes_asts); - - for (const auto &index_ast : index_list->children) - { - indexes.push_back( - std::move(MergeTreeIndexFactory::instance().get( - *this, - std::dynamic_pointer_cast(index_ast), - global_context))); - if (names.find(indexes.back()->name) != names.end()) - { - throw Exception( - "Index with name `" + indexes.back()->name + "` already exsists", - ErrorCodes::LOGICAL_ERROR); - } - names.insert(indexes.back()->name); - } + skip_indexes_ast = indexes_asts; + indexes = std::move(new_indexes); } } @@ -1056,6 +1060,13 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) columns_alter_forbidden.insert(col); } + for (auto index : indexes) + { + /// TODO: some special error telling about "drop index" + for (const String & col : index->expr->getRequiredColumns()) + columns_alter_forbidden.insert(col); + } + if (sorting_key_expr) { for (const ExpressionAction & action : sorting_key_expr->getActions()) @@ -1111,6 +1122,7 @@ void MergeTreeData::checkAlter(const AlterCommands & commands) } setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns, /* only_check = */ true); + setSkipIndexes(skip_indexes_ast, /* only_check = */ true); /// Check that type conversions are possible. ExpressionActionsPtr unused_expression; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index e71ad5fa9d6..70ae8b25c67 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -583,6 +583,7 @@ public: /// Secondary (data skipping) indexes for MergeTree MergeTreeIndexes indexes; + ASTPtr skip_indexes_ast; /// Names of columns for primary key + secondary sorting columns. Names sorting_key_columns; diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 833b20ab05b..f71a64662a4 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -244,6 +244,7 @@ void StorageMergeTree::alter( /// Reinitialize primary key because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setSkipIndexes(data.skip_indexes_ast); for (auto & transaction : transactions) transaction->commit(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index f60250d1be5..50b133482b2 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -461,6 +461,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. data.setPrimaryKeyAndColumns(new_order_by_ast, new_primary_key_ast, new_columns); + data.setSkipIndexes(data.skip_indexes_ast); } From 89b831eaf75c7479bd89876bb0153b9374957a54 Mon Sep 17 00:00:00 2001 From: Nikita Vasilev Date: Thu, 10 Jan 2019 20:48:04 +0300 Subject: [PATCH 24/24] added indexes meta to zookeeper --- .../MergeTree/ReplicatedMergeTreeTableMetadata.cpp | 14 ++++++++++++++ .../MergeTree/ReplicatedMergeTreeTableMetadata.h | 1 + 2 files changed, 15 insertions(+) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index ae5249d3d16..aaabc6901ae 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -44,6 +44,8 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); + + skip_indexes = formattedAST(data.skip_indexes_ast); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const @@ -64,6 +66,9 @@ void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const if (!sorting_key.empty()) out << "sorting key: " << sorting_key << "\n"; + + if (!skip_indexes.empty()) + out << "skip indexes: " << skip_indexes << "\n"; } String ReplicatedMergeTreeTableMetadata::toString() const @@ -93,6 +98,9 @@ void ReplicatedMergeTreeTableMetadata::read(ReadBuffer & in) if (checkString("sorting key: ", in)) in >> sorting_key >> "\n"; + + if (checkString("skip indexes: ", in)) + in >> skip_indexes >> "\n"; } ReplicatedMergeTreeTableMetadata ReplicatedMergeTreeTableMetadata::parse(const String & s) @@ -175,6 +183,12 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl ErrorCodes::METADATA_MISMATCH); } + if (skip_indexes != from_zk.skip_indexes) + throw Exception("Existing table metadata in ZooKeeper differs in skip indexes." + " Stored in ZooKeeper: " + from_zk.skip_indexes + + ", local: " + skip_indexes, + ErrorCodes::METADATA_MISMATCH); + return diff; } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index b063e226348..5fd863046e4 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -25,6 +25,7 @@ struct ReplicatedMergeTreeTableMetadata MergeTreeDataFormatVersion data_format_version; String partition_key; String sorting_key; + String skip_indexes; ReplicatedMergeTreeTableMetadata() = default; explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data);