From a1cdb0049d34d6bf6b4f0058a2906c5084c94657 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 13:47:22 +0300 Subject: [PATCH 01/56] Add test for user exception check --- .../__init__.py | 0 .../configs/remote_servers.xml | 16 ++++++ .../configs/user_restrictions.xml | 38 +++++++++++++ .../test.py | 54 +++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml create mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml new file mode 100644 index 00000000000..3593cbd7f36 --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml @@ -0,0 +1,16 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + + + diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml new file mode 100644 index 00000000000..bd91f1d495c --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/user_restrictions.xml @@ -0,0 +1,38 @@ + + + + 10000000000 + 0 + random + + + 10000000000 + 0 + random + 2 + + + + + + + ::/0 + + default + default + + + + + ::/0 + + good + default + + + + + + + + diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py new file mode 100644 index 00000000000..665f0877586 --- /dev/null +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -0,0 +1,54 @@ +import time + +import pytest + +from multiprocessing.dummy import Pool +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) +node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + for num, node in enumerate([node1, node2]): + node.query("create table real_tbl (ID UInt64, Value String) ENGINE = MergeTree() order by tuple()") + node.query("insert into real_tbl values(0, '0000'), (1, '1111')") + node.query("create table distr_tbl (ID UInt64, Value String) ENGINE Distributed(test_cluster, default, real_tbl)") + + node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") + node1.query("insert into nums values(0),(1)") + + yield cluster + finally: + cluster.shutdown() + +def num_getter(num): + if num % 2 == 0: + return node1 + else: + return node2 + +@pytest.mark.parametrize("node_getter", [ + (lambda _: node1), + (lambda _: node2), + (num_getter), +]) +def test_exception_message(started_cluster, node_getter): + assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" + assert node1.query("select number from nums order by number") == "0\n1\n" + try: + p = Pool(10) + def query(num): + node = node_getter(num) + node.query( + "select sleep(2) from distr_tbl where ID GLOBAL IN (select number from remote('node1', 'default', 'nums'))", + user='good') + + p.map(query, xrange(3)) + except Exception as ex: + assert 'Too many simultaneous queries for user good.' in ex.message + print ex.message From cea23a1486af3eab22b35e7ff6422abe74aadc7d Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Jan 2019 15:16:03 +0300 Subject: [PATCH 02/56] Fix LowCardinality empty array serialization. #3907 --- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 4 ++++ .../00800_low_cardinality_empty_array.reference | 2 ++ .../0_stateless/00800_low_cardinality_empty_array.sql | 7 +++++++ 3 files changed, 13 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index cf38941b743..01928a3db53 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -508,6 +508,10 @@ void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams( size_t max_limit = column.size() - offset; limit = limit ? std::min(limit, max_limit) : max_limit; + /// Do not write anything for empty column. (May happen while writing empty arrays.) + if (limit == 0) + return; + auto sub_column = low_cardinality_column.cutAndCompact(offset, limit); ColumnPtr positions = sub_column->getIndexesPtr(); ColumnPtr keys = sub_column->getDictionary().getNestedColumn(); diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference new file mode 100644 index 00000000000..c71bf50e82f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.reference @@ -0,0 +1,2 @@ +[] +[] diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql new file mode 100644 index 00000000000..0f02f6aa2d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_empty_array.sql @@ -0,0 +1,7 @@ +drop table if exists test.lc; +create table test.lc (names Array(LowCardinality(String))) engine=MergeTree order by tuple(); +insert into test.lc values ([]); +insert into test.lc select emptyArrayString(); +select * from test.lc; +drop table if exists test.lc; + From b8efafd400d37c0be10706c6485997c51a78386e Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 15:21:04 +0300 Subject: [PATCH 03/56] Fix bug with wrong user restrictions in remote table func --- .../ClusterProxy/executeQuery.cpp | 21 +++++++---- .../Interpreters/ClusterProxy/executeQuery.h | 4 +++ .../Storages/getStructureOfRemoteTable.cpp | 6 +++- .../configs/remote_servers.xml | 16 --------- .../test.py | 35 ++++++++----------- 5 files changed, 37 insertions(+), 45 deletions(-) delete mode 100644 dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp index 27b7d8338af..4b9aa713f07 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -14,14 +14,8 @@ namespace DB namespace ClusterProxy { -BlockInputStreams executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, - const ASTPtr & query_ast, const Context & context, const Settings & settings) +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) { - BlockInputStreams res; - - const std::string query = queryToString(query_ast); - Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -39,6 +33,19 @@ BlockInputStreams executeQuery( Context new_context(context); new_context.setSettings(new_settings); + return new_context; +} + +BlockInputStreams executeQuery( + IStreamFactory & stream_factory, const ClusterPtr & cluster, + const ASTPtr & query_ast, const Context & context, const Settings & settings) +{ + BlockInputStreams res; + + const std::string query = queryToString(query_ast); + + Context new_context = removeUserRestrictionsFromSettings(context, settings); + ThrottlerPtr user_level_throttler; if (auto process_list_element = context.getProcessListElement()) user_level_throttler = process_list_element->getUserNetworkThrottler(); diff --git a/dbms/src/Interpreters/ClusterProxy/executeQuery.h b/dbms/src/Interpreters/ClusterProxy/executeQuery.h index b12fc2b4646..5c07c287954 100644 --- a/dbms/src/Interpreters/ClusterProxy/executeQuery.h +++ b/dbms/src/Interpreters/ClusterProxy/executeQuery.h @@ -16,6 +16,10 @@ namespace ClusterProxy class IStreamFactory; +/// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) +/// from settings and creates new context with them +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings); + /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). diff --git a/dbms/src/Storages/getStructureOfRemoteTable.cpp b/dbms/src/Storages/getStructureOfRemoteTable.cpp index 174ec49a4f1..1e5b37d62d0 100644 --- a/dbms/src/Storages/getStructureOfRemoteTable.cpp +++ b/dbms/src/Storages/getStructureOfRemoteTable.cpp @@ -1,6 +1,7 @@ #include "getStructureOfRemoteTable.h" #include #include +#include #include #include #include @@ -54,7 +55,10 @@ ColumnsDescription getStructureOfRemoteTable( ColumnsDescription res; - auto input = std::make_shared(shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context); + + auto new_context = ClusterProxy::removeUserRestrictionsFromSettings(context, context.getSettingsRef()); + /// Execute remote query without restrictions (because it's not real user query, but part of implementation) + auto input = std::make_shared(shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), new_context); input->setPoolMode(PoolMode::GET_ONE); if (!table_func_ptr) input->setMainTable(QualifiedTableName{database, table}); diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml deleted file mode 100644 index 3593cbd7f36..00000000000 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/configs/remote_servers.xml +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - node1 - 9000 - - - node2 - 9000 - - - - - diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py index 665f0877586..26a42637063 100644 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -22,33 +22,26 @@ def started_cluster(): node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") node1.query("insert into nums values(0),(1)") + node2.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") + node2.query("insert into nums values(0),(1)") + yield cluster finally: cluster.shutdown() -def num_getter(num): - if num % 2 == 0: - return node1 - else: - return node2 - -@pytest.mark.parametrize("node_getter", [ - (lambda _: node1), - (lambda _: node2), - (num_getter), -]) -def test_exception_message(started_cluster, node_getter): +def test_exception_message(started_cluster): assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" assert node1.query("select number from nums order by number") == "0\n1\n" - try: - p = Pool(10) - def query(num): - node = node_getter(num) - node.query( - "select sleep(2) from distr_tbl where ID GLOBAL IN (select number from remote('node1', 'default', 'nums'))", - user='good') - p.map(query, xrange(3)) + def node_busy(_): + for i in xrange(10): + node1.query("select sleep(2)", user='default') + + busy_pool = Pool(3) + busy_pool.map_async(node_busy, xrange(3)) + time.sleep(1) # wait a little until polling start + try: + assert node2.query("select number from remote('node1', 'default', 'nums')", user='good') == "0\n1\n" except Exception as ex: - assert 'Too many simultaneous queries for user good.' in ex.message print ex.message + assert False, "Exception thrown while max_concurrent_queries_for_user is not exceeded" From f00b7ba08a3699fd299440c7b7a91adeb57d4d0d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 15:23:41 +0300 Subject: [PATCH 04/56] Simplify test --- .../test.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py index 26a42637063..4b7cc87c15a 100644 --- a/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py +++ b/dbms/tests/integration/test_concurrent_queries_for_user_restriction/test.py @@ -7,30 +7,21 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) -node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml'], main_configs=['configs/remote_servers.xml']) +node1 = cluster.add_instance('node1', user_configs=['configs/user_restrictions.xml']) +node2 = cluster.add_instance('node2', user_configs=['configs/user_restrictions.xml']) @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - for num, node in enumerate([node1, node2]): - node.query("create table real_tbl (ID UInt64, Value String) ENGINE = MergeTree() order by tuple()") - node.query("insert into real_tbl values(0, '0000'), (1, '1111')") - node.query("create table distr_tbl (ID UInt64, Value String) ENGINE Distributed(test_cluster, default, real_tbl)") - node1.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") node1.query("insert into nums values(0),(1)") - node2.query("create table nums (number UInt64) ENGINE = MergeTree() order by tuple()") - node2.query("insert into nums values(0),(1)") - yield cluster finally: cluster.shutdown() def test_exception_message(started_cluster): - assert node1.query("select ID from distr_tbl order by ID") == "0\n1\n" assert node1.query("select number from nums order by number") == "0\n1\n" def node_busy(_): @@ -39,7 +30,7 @@ def test_exception_message(started_cluster): busy_pool = Pool(3) busy_pool.map_async(node_busy, xrange(3)) - time.sleep(1) # wait a little until polling start + time.sleep(1) # wait a little until polling starts try: assert node2.query("select number from remote('node1', 'default', 'nums')", user='good') == "0\n1\n" except Exception as ex: From 5ab362a303725368ee1f9db26179b37f39f62fad Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 9 Jan 2019 17:47:51 +0300 Subject: [PATCH 05/56] Fix distinct by single LowCardinality numeric column. --- dbms/src/Columns/ColumnLowCardinality.h | 2 +- dbms/src/Interpreters/SetVariants.cpp | 2 +- ...low_cardinality_distinct_numeric.reference | 123 ++++++++++++++++++ ...00800_low_cardinality_distinct_numeric.sql | 7 + 4 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference create mode 100644 dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql diff --git a/dbms/src/Columns/ColumnLowCardinality.h b/dbms/src/Columns/ColumnLowCardinality.h index bfca6e41123..34a3db8589e 100644 --- a/dbms/src/Columns/ColumnLowCardinality.h +++ b/dbms/src/Columns/ColumnLowCardinality.h @@ -133,7 +133,7 @@ public: } bool valuesHaveFixedSize() const override { return getDictionary().valuesHaveFixedSize(); } - bool isFixedAndContiguous() const override { return getDictionary().isFixedAndContiguous(); } + bool isFixedAndContiguous() const override { return false; } size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } diff --git a/dbms/src/Interpreters/SetVariants.cpp b/dbms/src/Interpreters/SetVariants.cpp index f0d9bbb2af8..6f457ed0bed 100644 --- a/dbms/src/Interpreters/SetVariants.cpp +++ b/dbms/src/Interpreters/SetVariants.cpp @@ -137,7 +137,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose } /// If there is one numeric key that fits into 64 bits - if (keys_size == 1 && nested_key_columns[0]->isNumeric()) + if (keys_size == 1 && nested_key_columns[0]->isNumeric() && !nested_key_columns[0]->lowCardinality()) { size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed(); if (size_of_field == 1) diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference new file mode 100644 index 00000000000..a39df1e16c0 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.reference @@ -0,0 +1,123 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql new file mode 100644 index 00000000000..d3ba9138a8f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distinct_numeric.sql @@ -0,0 +1,7 @@ +set allow_experimental_low_cardinality_type = 1; +drop table if exists test.lc; +create table test.lc (val LowCardinality(UInt64)) engine = MergeTree order by val; +insert into test.lc select number % 123 from system.numbers limit 100000; +select distinct(val) from test.lc order by val; +drop table if exists test.lc; + From 19f465295940383a314d7957f20826880ed495ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 18:44:20 +0300 Subject: [PATCH 06/56] Fixed bugs found by PVS-Studio --- dbms/programs/copier/ClusterCopier.cpp | 62 +++++++++---------- dbms/programs/obfuscator/Obfuscator.cpp | 4 +- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 4 +- dbms/src/Compression/CompressionCodecZSTD.cpp | 2 +- .../MergeSortingBlockInputStream.cpp | 2 +- dbms/src/Dictionaries/Embedded/RegionsNames.h | 2 +- dbms/src/Functions/hasColumnInTable.cpp | 2 +- .../Interpreters/InterpreterInsertQuery.cpp | 1 - .../Interpreters/InterpreterSystemQuery.cpp | 3 +- .../src/Interpreters/MutationsInterpreter.cpp | 3 +- .../PredicateExpressionsOptimizer.cpp | 8 +++ .../PredicateExpressionsOptimizer.h | 6 -- dbms/src/Interpreters/tests/hash_map.cpp | 29 +++++---- dbms/src/Storages/Kafka/StorageKafka.cpp | 2 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../ReplicatedMergeTreeCleanupThread.cpp | 5 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 3 +- .../TableFunctions/TableFunctionNumbers.cpp | 2 +- libs/libcommon/include/common/Types.h | 21 ------- libs/libcommon/src/tests/CMakeLists.txt | 2 +- utils/compressor/mutator.cpp | 4 +- 22 files changed, 77 insertions(+), 94 deletions(-) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 91ccdc88275..203a7f0cebb 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -243,7 +243,7 @@ struct ClusterPartition UInt64 rows_copied = 0; UInt64 blocks_copied = 0; - size_t total_tries = 0; + UInt64 total_tries = 0; }; @@ -340,7 +340,7 @@ struct TaskCluster String default_local_database; /// Limits number of simultaneous workers - size_t max_workers = 0; + UInt64 max_workers = 0; /// Base settings for pull and push Settings settings_common; @@ -773,11 +773,11 @@ public: } template - decltype(auto) retry(T && func, size_t max_tries = 100) + decltype(auto) retry(T && func, UInt64 max_tries = 100) { std::exception_ptr exception; - for (size_t try_number = 1; try_number <= max_tries; ++try_number) + for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { try { @@ -880,7 +880,7 @@ public: } /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(TaskTable & task_table, size_t num_threads = 0) + void discoverTablePartitions(TaskTable & task_table, UInt64 num_threads = 0) { /// Fetch partitions list from a shard { @@ -985,7 +985,7 @@ public: /// Retry table processing bool table_is_done = false; - for (size_t num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries) + for (UInt64 num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries) { if (tryProcessTable(task_table)) { @@ -1044,7 +1044,7 @@ protected: String workers_path = getWorkersPath(); String current_worker_path = getCurrentWorkerNodePath(); - size_t num_bad_version_errors = 0; + UInt64 num_bad_version_errors = 0; while (true) { @@ -1055,7 +1055,7 @@ protected: auto version = stat.version; zookeeper->get(workers_path, &stat); - if (static_cast(stat.numChildren) >= task_cluster->max_workers) + if (static_cast(stat.numChildren) >= task_cluster->max_workers) { LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")" << ". Postpone processing " << description); @@ -1163,7 +1163,7 @@ protected: } // If all task is finished and zxid is not changed then partition could not become dirty again - for (size_t shard_num = 0; shard_num < status_paths.size(); ++shard_num) + for (UInt64 shard_num = 0; shard_num < status_paths.size(); ++shard_num) { if (zxid1[shard_num] != zxid2[shard_num]) { @@ -1280,7 +1280,7 @@ protected: LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query); /// Limit number of max executing replicas to 1 - size_t num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1); + UInt64 num_shards = executeQueryOnCluster(cluster_push, query, nullptr, &settings_push, PoolMode::GET_ONE, 1); if (num_shards < cluster_push->getShardCount()) { @@ -1299,8 +1299,8 @@ protected: } - static constexpr size_t max_table_tries = 1000; - static constexpr size_t max_shard_partition_tries = 600; + static constexpr UInt64 max_table_tries = 1000; + static constexpr UInt64 max_shard_partition_tries = 600; bool tryProcessTable(TaskTable & task_table) { @@ -1317,7 +1317,7 @@ protected: Stopwatch watch; TasksShard expected_shards; - size_t num_failed_shards = 0; + UInt64 num_failed_shards = 0; ++cluster_partition.total_tries; @@ -1368,7 +1368,7 @@ protected: bool is_unprioritized_task = !previous_shard_is_instantly_finished && shard->priority.is_remote; PartitionTaskStatus task_status = PartitionTaskStatus::Error; bool was_error = false; - for (size_t try_num = 0; try_num < max_shard_partition_tries; ++try_num) + for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) { task_status = tryProcessPartitionTask(partition, is_unprioritized_task); @@ -1434,8 +1434,8 @@ protected: } } - size_t required_partitions = task_table.cluster_partitions.size(); - size_t finished_partitions = task_table.finished_cluster_partitions.size(); + UInt64 required_partitions = task_table.cluster_partitions.size(); + UInt64 finished_partitions = task_table.finished_cluster_partitions.size(); bool table_is_done = finished_partitions >= required_partitions; if (!table_is_done) @@ -1645,7 +1645,7 @@ protected: String query = queryToString(create_query_push_ast); LOG_DEBUG(log, "Create destination tables. Query: " << query); - size_t shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, + UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query, create_query_push_ast, &task_cluster->settings_push, PoolMode::GET_MANY); LOG_DEBUG(log, "Destination tables " << getDatabaseDotTable(task_table.table_push) << " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount()); @@ -1699,7 +1699,7 @@ protected: std::future future_is_dirty_checker; Stopwatch watch(CLOCK_MONOTONIC_COARSE); - constexpr size_t check_period_milliseconds = 500; + constexpr UInt64 check_period_milliseconds = 500; /// Will asynchronously check that ZooKeeper connection and is_dirty flag appearing while copy data auto cancel_check = [&] () @@ -1917,16 +1917,16 @@ protected: /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster * Returns number of shards for which at least one replica executed query successfully */ - size_t executeQueryOnCluster( + UInt64 executeQueryOnCluster( const ClusterPtr & cluster, const String & query, const ASTPtr & query_ast_ = nullptr, const Settings * settings = nullptr, PoolMode pool_mode = PoolMode::GET_ALL, - size_t max_successful_executions_per_shard = 0) const + UInt64 max_successful_executions_per_shard = 0) const { auto num_shards = cluster->getShardsInfo().size(); - std::vector per_shard_num_successful_replicas(num_shards, 0); + std::vector per_shard_num_successful_replicas(num_shards, 0); ASTPtr query_ast; if (query_ast_ == nullptr) @@ -1939,10 +1939,10 @@ protected: /// We need to execute query on one replica at least - auto do_for_shard = [&] (size_t shard_index) + auto do_for_shard = [&] (UInt64 shard_index) { const Cluster::ShardInfo & shard = cluster->getShardsInfo().at(shard_index); - size_t & num_successful_executions = per_shard_num_successful_replicas.at(shard_index); + UInt64 & num_successful_executions = per_shard_num_successful_replicas.at(shard_index); num_successful_executions = 0; auto increment_and_check_exit = [&] () @@ -1951,12 +1951,12 @@ protected: return max_successful_executions_per_shard && num_successful_executions >= max_successful_executions_per_shard; }; - size_t num_replicas = cluster->getShardsAddresses().at(shard_index).size(); - size_t num_local_replicas = shard.getLocalNodeCount(); - size_t num_remote_replicas = num_replicas - num_local_replicas; + UInt64 num_replicas = cluster->getShardsAddresses().at(shard_index).size(); + UInt64 num_local_replicas = shard.getLocalNodeCount(); + UInt64 num_remote_replicas = num_replicas - num_local_replicas; /// In that case we don't have local replicas, but do it just in case - for (size_t i = 0; i < num_local_replicas; ++i) + for (UInt64 i = 0; i < num_local_replicas; ++i) { auto interpreter = InterpreterFactory::get(query_ast, context); interpreter->execute(); @@ -1997,16 +1997,16 @@ protected: }; { - ThreadPool thread_pool(std::min(num_shards, getNumberOfPhysicalCPUCores())); + ThreadPool thread_pool(std::min(num_shards, UInt64(getNumberOfPhysicalCPUCores()))); - for (size_t shard_index = 0; shard_index < num_shards; ++shard_index) + for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index) thread_pool.schedule([=] { do_for_shard(shard_index); }); thread_pool.wait(); } - size_t successful_shards = 0; - for (size_t num_replicas : per_shard_num_successful_replicas) + UInt64 successful_shards = 0; + for (UInt64 num_replicas : per_shard_num_successful_replicas) successful_shards += (num_replicas > 0); return successful_shards; diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index 0d9946813b9..6edb0de82b3 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -123,7 +123,7 @@ UInt64 hash(Ts... xs) UInt64 maskBits(UInt64 x, size_t num_bits) { - return x & ((1 << num_bits) - 1); + return x & ((1ULL << num_bits) - 1); } @@ -149,7 +149,7 @@ UInt64 feistelNetwork(UInt64 x, size_t num_bits, UInt64 seed, size_t num_rounds UInt64 bits = maskBits(x, num_bits); for (size_t i = 0; i < num_rounds; ++i) bits = feistelRound(bits, num_bits, seed, i); - return (x & ~((1 << num_bits) - 1)) ^ bits; + return (x & ~((1ULL << num_bits) - 1)) ^ bits; } diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 47aa2b91b17..9626a54aa20 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1039,8 +1039,8 @@ void ZooKeeper::sendThread() { /// Wait for the next request in queue. No more than operation timeout. No more than until next heartbeat time. UInt64 max_wait = std::min( - std::chrono::duration_cast(next_heartbeat_time - now).count(), - operation_timeout.totalMilliseconds()); + UInt64(std::chrono::duration_cast(next_heartbeat_time - now).count()), + UInt64(operation_timeout.totalMilliseconds())); RequestInfo info; if (requests_queue.tryPop(info, max_wait)) diff --git a/dbms/src/Compression/CompressionCodecZSTD.cpp b/dbms/src/Compression/CompressionCodecZSTD.cpp index d3f96cc7e06..8c79b4439ac 100644 --- a/dbms/src/Compression/CompressionCodecZSTD.cpp +++ b/dbms/src/Compression/CompressionCodecZSTD.cpp @@ -63,7 +63,7 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_) void registerCodecZSTD(CompressionCodecFactory & factory) { - UInt8 method_code = static_cast(CompressionMethodByte::ZSTD); + UInt8 method_code = UInt8(CompressionMethodByte::ZSTD); factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr { int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL; diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 0dfd07fc6b6..12ad34b6433 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -78,7 +78,7 @@ Block MergeSortingBlockInputStream::readImpl() if (max_bytes_before_external_sort && sum_bytes_in_blocks > max_bytes_before_external_sort) { Poco::File(tmp_path).createDirectories(); - temporary_files.emplace_back(new Poco::TemporaryFile(tmp_path)); + temporary_files.emplace_back(std::make_unique(tmp_path)); const std::string & path = temporary_files.back()->path(); WriteBufferFromFile file_buf(path); CompressedWriteBuffer compressed_buf(file_buf); diff --git a/dbms/src/Dictionaries/Embedded/RegionsNames.h b/dbms/src/Dictionaries/Embedded/RegionsNames.h index 074a41162f1..7acb23d001e 100644 --- a/dbms/src/Dictionaries/Embedded/RegionsNames.h +++ b/dbms/src/Dictionaries/Embedded/RegionsNames.h @@ -73,7 +73,7 @@ public: { size_t language_id = static_cast(language); - if (region_id > names_refs[language_id].size()) + if (region_id >= names_refs[language_id].size()) return StringRef("", 0); StringRef ref = names_refs[language_id][region_id]; diff --git a/dbms/src/Functions/hasColumnInTable.cpp b/dbms/src/Functions/hasColumnInTable.cpp index 1039cd1b70b..9c8017497e3 100644 --- a/dbms/src/Functions/hasColumnInTable.cpp +++ b/dbms/src/Functions/hasColumnInTable.cpp @@ -132,7 +132,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & has_column = remote_columns.hasPhysical(column_name); } - block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, has_column); + block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, Field(has_column)); } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index e9d4e3f1440..1af0210bc27 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -99,7 +99,6 @@ BlockIO InterpreterInsertQuery::execute() out = std::make_shared(query.database, query.table, table, context, query_ptr, query.no_destination); - /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote())) diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index feb351180c7..fc472ad8a9e 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB @@ -289,7 +290,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) if (replica_names.empty()) return; - ThreadPool pool(std::min(getNumberOfPhysicalCPUCores(), replica_names.size())); + ThreadPool pool(std::min(size_t(getNumberOfPhysicalCPUCores()), replica_names.size())); for (auto & table : replica_names) pool.schedule([&] () { tryRestartReplica(table.first, table.second, system_context); }); pool.wait(); diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index d59fc811338..28578ac19dd 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -203,10 +203,9 @@ void MutationsInterpreter::prepare(bool dry_run) } } } - } - if (!updated_columns.empty()) validateUpdateColumns(storage, updated_columns, column_to_affected_materialized); + } /// First, break a sequence of commands into stages. stages.emplace_back(context); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index af84eac7f91..408b827adae 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -14,6 +14,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_ELEMENT_IN_AST; +} + static constexpr auto and_function_name = "and"; PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( @@ -400,6 +406,8 @@ ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_que DatabaseAndTableWithAlias database_and_table_name(*database_and_table_ast); storage = context.getTable(database_and_table_name.database, database_and_table_name.table); } + else + throw Exception("Logical error: unexpected table expression", ErrorCodes::LOGICAL_ERROR); const auto block = storage->getSampleBlock(); for (size_t idx = 0; idx < block.columns(); idx++) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index e999489475c..65148e0682a 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -14,12 +14,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; -} - using PredicateExpressions = std::vector; using ProjectionWithAlias = std::pair; using ProjectionsWithAliases = std::vector; diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index 6ee31d0eac1..a3e1cad8d12 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -107,13 +107,13 @@ int main(int argc, char ** argv) AggregateFunctionPtr func_avg = factory.get("avg", data_types_uint64); AggregateFunctionPtr func_uniq = factory.get("uniq", data_types_uint64); - #define INIT \ - { \ - value.resize(3); \ - \ - value[0] = func_count.get();\ - value[1] = func_avg.get(); \ - value[2] = func_uniq.get(); \ + #define INIT \ + { \ + value.resize(3); \ + \ + value[0] = func_count.get(); \ + value[1] = func_avg.get(); \ + value[2] = func_uniq.get(); \ } INIT @@ -162,7 +162,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -192,7 +193,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -223,7 +225,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(&it->second) Value(std::move(value)); + new(&it->second) Value; + std::swap(it->second, value); INIT } } @@ -248,7 +251,7 @@ int main(int argc, char ** argv) std::unordered_map>::iterator it; for (size_t i = 0; i < n; ++i) { - it = map.insert(std::make_pair(data[i], std::move(value))).first; + it = map.insert(std::make_pair(data[i], value)).first; INIT } @@ -269,7 +272,7 @@ int main(int argc, char ** argv) map.set_empty_key(-1ULL); for (size_t i = 0; i < n; ++i) { - it = map.insert(std::make_pair(data[i], std::move(value))).first; + it = map.insert(std::make_pair(data[i], value)).first; INIT } @@ -289,7 +292,7 @@ int main(int argc, char ** argv) GOOGLE_NAMESPACE::sparse_hash_map>::iterator it; for (size_t i = 0; i < n; ++i) { - map.insert(std::make_pair(data[i], std::move(value))); + map.insert(std::make_pair(data[i], value)); INIT } diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index a3dd993e8c6..e6ccf544ba1 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -304,7 +304,7 @@ BlockInputStreams StorageKafka::read( if (num_created_consumers == 0) return BlockInputStreams(); - const size_t stream_count = std::min(num_streams, num_created_consumers); + const size_t stream_count = std::min(size_t(num_streams), num_created_consumers); BlockInputStreams streams; streams.reserve(stream_count); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 119b0861fbc..bdcd28c562b 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -2269,7 +2269,7 @@ MergeTreeData::DataPartsVector MergeTreeData::getDataPartsVector(const DataPartS for (auto state : affordable_states) { - buf = std::move(res); + std::swap(buf, res); res.clear(); auto range = getDataPartsStateRange(state); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index b6c2bbd96ee..2f1ee2a2943 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -86,7 +86,6 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() /// We will keep logs after and including this threshold. UInt64 min_saved_log_pointer = std::numeric_limits::max(); - UInt64 min_log_pointer_lost_candidate = std::numeric_limits::max(); Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/log"); @@ -118,7 +117,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/host", &host_stat); String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer"); - UInt32 log_pointer = 0; + UInt64 log_pointer = 0; if (!pointer.empty()) log_pointer = parse(pointer); @@ -190,7 +189,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() for (const String & replica : recovering_replicas) { String pointer = zookeeper->get(storage.zookeeper_path + "/replicas/" + replica + "/log_pointer"); - UInt32 log_pointer = 0; + UInt64 log_pointer = 0; if (!pointer.empty()) log_pointer = parse(pointer); min_saved_log_pointer = std::min(min_saved_log_pointer, log_pointer); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d6275b94a0c..fcdce191169 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -648,7 +648,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo /// Let's find the action to merge this part with others. Let's remember others. StringSet parts_for_merge; - Queue::iterator merge_entry; + Queue::iterator merge_entry = queue.end(); for (Queue::iterator it = queue.begin(); it != queue.end(); ++it) { if ((*it)->type == LogEntry::MERGE_PARTS || (*it)->type == LogEntry::MUTATE_PART) diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 193ca0ebdbb..4aa8791708b 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -219,7 +220,7 @@ BlockInputStreams StorageMerge::read( size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); size_t current_streams = std::min(current_need_streams, remaining_streams); remaining_streams -= current_streams; - current_streams = std::max(1, current_streams); + current_streams = std::max(size_t(1), current_streams); StoragePtr storage = it->first; TableStructureReadLockPtr struct_lock = it->second; diff --git a/dbms/src/TableFunctions/TableFunctionNumbers.cpp b/dbms/src/TableFunctions/TableFunctionNumbers.cpp index 1970a757b2d..8226542d9ee 100644 --- a/dbms/src/TableFunctions/TableFunctionNumbers.cpp +++ b/dbms/src/TableFunctions/TableFunctionNumbers.cpp @@ -34,7 +34,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_function, const res->startup(); return res; } - throw new Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception("Table function 'numbers' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } void registerTableFunctionNumbers(TableFunctionFactory & factory) diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index a6bfcc6ae31..d2fdb0a8343 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -12,27 +12,6 @@ using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; - -/** This is not the best way to overcome an issue of different definitions - * of uint64_t and size_t on Linux and Mac OS X (both 64 bit). - * - * Note that on both platforms, long and long long are 64 bit types. - * But they are always different types (with the same physical representation). - */ -namespace std -{ - inline UInt64 max(unsigned long x, unsigned long long y) { return x > y ? x : y; } - inline UInt64 max(unsigned long long x, unsigned long y) { return x > y ? x : y; } - inline UInt64 min(unsigned long x, unsigned long long y) { return x < y ? x : y; } - inline UInt64 min(unsigned long long x, unsigned long y) { return x < y ? x : y; } - - inline Int64 max(long x, long long y) { return x > y ? x : y; } - inline Int64 max(long long x, long y) { return x > y ? x : y; } - inline Int64 min(long x, long long y) { return x < y ? x : y; } - inline Int64 min(long long x, long y) { return x < y ? x : y; } -} - - /// Workaround for the issue, that KDevelop doesn't see time_t and size_t types (for syntax highlight). #ifdef IN_KDEVELOP_PARSER using time_t = Int64; diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 86f15cd7a53..ed19600c870 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries (local_date_time_comparison common) add_check(multi_version) add_check(local_date_time_comparison) -add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp) +add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp gtest_max.cpp) target_link_libraries (unit_tests_libcommon common ${GTEST_MAIN_LIBRARIES}) add_check(unit_tests_libcommon) diff --git a/utils/compressor/mutator.cpp b/utils/compressor/mutator.cpp index c8cca3e6ecf..65125d073d0 100644 --- a/utils/compressor/mutator.cpp +++ b/utils/compressor/mutator.cpp @@ -106,7 +106,7 @@ static void mutate(pcg64 & generator, void * src, size_t length) && isAlphaASCII(pos[2])) { auto res = rand(generator, 0, 3); - if (res == 2) + if (res == 2) { std::swap(pos[0], pos[1]); } @@ -118,7 +118,7 @@ static void mutate(pcg64 & generator, void * src, size_t length) else if (pos + 5 <= end && pos[0] >= 0xC0 && pos[0] <= 0xDF && pos[1] >= 0x80 && pos[1] <= 0xBF && pos[2] >= 0x20 && pos[2] < 0x80 && !isAlphaASCII(pos[2]) - && pos[3] >= 0xC0 && pos[0] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF) + && pos[3] >= 0xC0 && pos[3] <= 0xDF && pos[4] >= 0x80 && pos[4] <= 0xBF) { auto res = rand(generator, 0, 3); if (res == 2) From 43bd57eaf10f541db67543af9476c385ceab81bc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 18:47:25 +0300 Subject: [PATCH 07/56] Addition to prev. revision --- libs/libcommon/src/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index ed19600c870..86f15cd7a53 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries (local_date_time_comparison common) add_check(multi_version) add_check(local_date_time_comparison) -add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp gtest_max.cpp) +add_executable (unit_tests_libcommon gtest_json_test.cpp gtest_strong_typedef.cpp gtest_find_symbols.cpp) target_link_libraries (unit_tests_libcommon common ${GTEST_MAIN_LIBRARIES}) add_check(unit_tests_libcommon) From 95a9b8b3f73596f0adbc6d4f530592fbc45e7fcc Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 19:04:39 +0300 Subject: [PATCH 08/56] CLICKHOUSE-4245: Turn on query_log in stateless and stress tests --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/log_queries.xml | 7 +++++++ docker/test/stress/Dockerfile | 3 ++- docker/test/stress/log_queries.xml | 7 +++++++ 4 files changed, 17 insertions(+), 1 deletion(-) create mode 100644 docker/test/stateless/log_queries.xml create mode 100644 docker/test/stress/log_queries.xml diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 4bdad6aa02c..bc81c298553 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,6 +25,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY zookeeper.xml /etc/clickhouse-server/config.d/zookeeper.xml COPY listen.xml /etc/clickhouse-server/config.d/listen.xml +COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stateless/log_queries.xml b/docker/test/stateless/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/docker/test/stateless/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 7987e042273..80101688118 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:18.04 +FROM ubuntu:18.10 RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ @@ -20,6 +20,7 @@ RUN apt-get update -y \ telnet COPY ./stress /stress +COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stress/log_queries.xml b/docker/test/stress/log_queries.xml new file mode 100644 index 00000000000..25261072ade --- /dev/null +++ b/docker/test/stress/log_queries.xml @@ -0,0 +1,7 @@ + + + + 1 + + + From 4676009c8e2e1fcdff5df3aef834f217c7462fc4 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 9 Jan 2019 19:13:03 +0300 Subject: [PATCH 09/56] PyYAML==4.2b1 --- docs/tools/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index e8680473939..85cd355dbdc 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -18,7 +18,7 @@ mkdocs==1.0.4 Pygments==2.2.0 python-slugify==1.2.6 pytz==2017.3 -PyYAML==3.12 +PyYAML==4.2b1 recommonmark==0.4.0 requests==2.21.0 singledispatch==3.4.0.3 From 729ca697d9453bfd8345e817c91386367fcdd424 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 19:16:59 +0300 Subject: [PATCH 10/56] minor SyntaxAnalyzer refactoring --- dbms/src/Interpreters/AnalyzedJoin.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 10 +- .../Interpreters/InterpreterCreateQuery.cpp | 2 +- .../Interpreters/InterpreterSelectQuery.cpp | 8 +- .../src/Interpreters/MutationsInterpreter.cpp | 4 +- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 269 +++++++----------- dbms/src/Interpreters/SyntaxAnalyzer.h | 10 +- .../evaluateConstantExpression.cpp | 2 +- .../Interpreters/evaluateMissingDefaults.cpp | 4 +- dbms/src/Storages/AlterCommands.cpp | 4 +- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 10 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +- dbms/src/Storages/StorageDistributed.cpp | 2 +- dbms/src/Storages/StorageMerge.cpp | 2 +- dbms/src/Storages/VirtualColumnUtils.cpp | 2 +- .../transformQueryForExternalDatabase.cpp | 4 +- 16 files changed, 145 insertions(+), 194 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 83dfebcd852..c39ea9c9495 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -48,7 +48,7 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( source_column_names.emplace_back(column.name_and_type); ASTPtr query = expression_list; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_column_names, required_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_column_names, required_columns); ExpressionAnalyzer analyzer(query, syntax_result, context, {}, required_columns); auto joined_block_actions = analyzer.getActions(false); diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 59a8c084b9e..dcf2f0b051d 100644 --- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -37,7 +37,7 @@ static ASTPtr addTypeConversion(std::unique_ptr && ast, const String return res; } -bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr &) +bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { /// Processed if (typeid_cast(node.get()) || @@ -48,6 +48,14 @@ bool ExecuteScalarSubqueriesMatcher::needChildVisit(ASTPtr & node, const ASTPtr if (typeid_cast(node.get())) return false; + if (typeid_cast(node.get())) + { + /// Do not go to FROM, JOIN, UNION. + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + return false; + } + return true; } diff --git a/dbms/src/Interpreters/InterpreterCreateQuery.cpp b/dbms/src/Interpreters/InterpreterCreateQuery.cpp index 3bce5755a6c..8cc3d1b88c1 100644 --- a/dbms/src/Interpreters/InterpreterCreateQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCreateQuery.cpp @@ -242,7 +242,7 @@ static ColumnsDeclarationAndModifiers parseColumns(const ASTExpressionList & col /// set missing types and wrap default_expression's in a conversion-function if necessary if (!defaulted_columns.empty()) { - auto syntax_analyzer_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, columns); + auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(default_expr_list, columns); const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); const auto block = actions->getSampleBlock(); diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 0a9cb78d5f7..991f31afcdc 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -184,8 +184,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) table_lock = storage->lockStructure(false); - syntax_analyzer_result = SyntaxAnalyzer(context, storage) - .analyze(query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, subquery_depth); + syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze( + query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage); query_analyzer = std::make_unique( query_ptr, syntax_analyzer_result, context, NamesAndTypesList(), required_result_column_names, subquery_depth, !only_analyze); @@ -792,7 +792,7 @@ void InterpreterSelectQuery::executeFetchColumns( } auto additional_source_columns_set = ext::map(additional_source_columns, [] (const auto & it) { return it.name; }); - auto syntax_result = SyntaxAnalyzer(context, storage).analyze(required_columns_expr_list, additional_source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(required_columns_expr_list, additional_source_columns, {}, storage); alias_actions = ExpressionAnalyzer(required_columns_expr_list, syntax_result, context).getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. @@ -829,7 +829,7 @@ void InterpreterSelectQuery::executeFetchColumns( } prewhere_info->prewhere_actions = std::move(new_actions); - auto analyzed_result = SyntaxAnalyzer(context, {}).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical()); + auto analyzed_result = SyntaxAnalyzer(context).analyze(required_prewhere_columns_expr_list, storage->getColumns().getAllPhysical()); prewhere_info->alias_actions = ExpressionAnalyzer(required_prewhere_columns_expr_list, analyzed_result, context) .getActions(true, false); diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp index d59fc811338..fbf64f081f7 100644 --- a/dbms/src/Interpreters/MutationsInterpreter.cpp +++ b/dbms/src/Interpreters/MutationsInterpreter.cpp @@ -194,7 +194,7 @@ void MutationsInterpreter::prepare(bool dry_run) if (col_default.kind == ColumnDefaultKind::Materialized) { auto query = col_default.expression->clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); ExpressionAnalyzer analyzer(query, syntax_result, context); for (const String & dependency : analyzer.getRequiredSourceColumns()) { @@ -301,7 +301,7 @@ void MutationsInterpreter::prepare(bool dry_run) for (const String & column : stage.output_columns) all_asts->children.push_back(std::make_shared(column)); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(all_asts, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(all_asts, all_columns); stage.analyzer = std::make_unique(all_asts, syntax_result, context); ExpressionActionsChain & actions_chain = stage.expressions_chain; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 3d9a7f55df3..4bce5ce7777 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -42,141 +42,6 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; } -namespace -{ - -using LogAST = DebugASTLog; /// set to true to enable logs -using Aliases = SyntaxAnalyzerResult::Aliases; - -/// Add columns from storage to source_columns list. -void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, - StoragePtr & storage, NamesAndTypesList & source_columns); - -/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. -void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); - -/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases. -void normalizeTree( - ASTPtr & query, - SyntaxAnalyzerResult & result, - const Names & source_columns, - const NameSet & source_columns_set, - const StoragePtr & storage, - const Context & context, - const ASTSelectQuery * select_query, - bool asterisk_left_columns_only); - -/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. -/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. -void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns); - -/// Replacing scalar subqueries with constant values. -void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query, - const Context & context, size_t subquery_depth); - -/// Remove Function_if AST if condition is constant. -void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases); - -/// Eliminates injective function calls and constant expressions from group by statement. -void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context); - -/// Remove duplicate items from ORDER BY. -void optimizeOrderBy(const ASTSelectQuery * select_query); - -/// Remove duplicate items from LIMIT BY. -void optimizeLimitBy(const ASTSelectQuery * select_query); - -/// Remove duplicated columns from USING(...). -void optimizeUsing(const ASTSelectQuery * select_query); - -void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, - const Names & source_columns, const NameSet & source_columns_set); - -/// Parse JOIN ON expression and collect ASTs for joined columns. -void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); - -/// Find the columns that are obtained by JOIN. -void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context); -} - -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( - ASTPtr & query, - const NamesAndTypesList & source_columns_, - const Names & required_result_columns, - size_t subquery_depth) const -{ - SyntaxAnalyzerResult result; - result.storage = storage; - result.source_columns = source_columns_; - auto * select_query = typeid_cast(query.get()); - collectSourceColumns(select_query, context, result.storage, result.source_columns); - - const auto & settings = context.getSettingsRef(); - - Names source_columns_list; - source_columns_list.reserve(result.source_columns.size()); - for (const auto & type_name : result.source_columns) - source_columns_list.emplace_back(type_name.name); - NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); - - translateQualifiedNames(query, select_query, source_columns_set, context); - - /// Depending on the user's profile, check for the execution rights - /// distributed subqueries inside the IN or JOIN sections and process these subqueries. - InJoinSubqueriesPreprocessor(context).process(select_query); - - /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); - - /// Creates a dictionary `aliases`: alias -> ASTPtr - { - LogAST log; - QueryAliasesVisitor::Data query_aliases_data{result.aliases}; - QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); - } - - /// Common subexpression elimination. Rewrite rules. - normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, - context, select_query, settings.asterisk_left_columns_only != 0); - - /// Remove unneeded columns according to 'required_result_columns'. - /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. - /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) - /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns); - - /// Executing scalar subqueries - replacing them with constant values. - executeScalarSubqueries(query, select_query, context, subquery_depth); - - /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. - optimizeIfWithConstantCondition(query, result.aliases); - - /// GROUP BY injective function elimination. - optimizeGroupBy(select_query, source_columns_set, context); - - /// Remove duplicate items from ORDER BY. - optimizeOrderBy(select_query); - - // Remove duplicated elements from LIMIT BY clause. - optimizeLimitBy(select_query); - - /// Remove duplicated columns from USING(...). - optimizeUsing(select_query); - - /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); - - /// Push the predicate expression down to the subqueries. - result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - - collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); - - return std::make_shared(result); -} - void removeDuplicateColumns(NamesAndTypesList & columns) { std::set names; @@ -192,15 +57,12 @@ void removeDuplicateColumns(NamesAndTypesList & columns) namespace { -void collectSourceColumns(ASTSelectQuery * select_query, const Context & context, - StoragePtr & storage, NamesAndTypesList & source_columns) -{ - if (!storage && select_query) - { - if (auto db_and_table = getDatabaseAndTable(*select_query, 0)) - storage = context.tryGetTable(db_and_table->database, db_and_table->table); - } +using LogAST = DebugASTLog; /// set to true to enable logs + +/// Add columns from storage to source_columns list. +void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, NamesAndTypesList & source_columns) +{ if (storage) { auto physical_columns = storage->getColumns().getAllPhysical(); @@ -219,6 +81,7 @@ void collectSourceColumns(ASTSelectQuery * select_query, const Context & context removeDuplicateColumns(source_columns); } +/// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -233,6 +96,7 @@ void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, visitor.visit(query); } +/// For star nodes(`*`), expand them to a list of all columns. For literal nodes, substitute aliases. void normalizeTree( ASTPtr & query, SyntaxAnalyzerResult & result, @@ -297,6 +161,8 @@ bool hasArrayJoin(const ASTPtr & ast) return false; } +/// Sometimes we have to calculate more columns in SELECT clause than will be returned from query. +/// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) { if (!select_query) @@ -335,29 +201,12 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, elements = std::move(new_elements); } -void executeScalarSubqueries(ASTPtr & query, const ASTSelectQuery * select_query, - const Context & context, size_t subquery_depth) +/// Replacing scalar subqueries with constant values. +void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t subquery_depth) { LogAST log; - - if (!select_query) - { - ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; - ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); - } - else - { - for (auto & child : query->children) - { - /// Do not go to FROM, JOIN, UNION. - if (!typeid_cast(child.get()) - && !typeid_cast(child.get())) - { - ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; - ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(child); - } - } - } + ExecuteScalarSubqueriesVisitor::Data visitor_data{context, subquery_depth}; + ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) @@ -394,7 +243,8 @@ bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) return false; } -void optimizeIfWithConstantCondition(ASTPtr & current_ast, Aliases & aliases) +/// Remove Function_if AST if condition is constant. +void optimizeIfWithConstantCondition(ASTPtr & current_ast, SyntaxAnalyzerResult::Aliases & aliases) { if (!current_ast) return; @@ -491,6 +341,7 @@ const std::unordered_set possibly_injective_function_names "dictGetDateTime" }; +/// Eliminates injective function calls and constant expressions from group by statement. void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { if (!(select_query && select_query->group_expression_list)) @@ -594,6 +445,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum } } +/// Remove duplicate items from ORDER BY. void optimizeOrderBy(const ASTSelectQuery * select_query) { if (!(select_query && select_query->order_expression_list)) @@ -620,6 +472,7 @@ void optimizeOrderBy(const ASTSelectQuery * select_query) elems = unique_elems; } +/// Remove duplicate items from LIMIT BY. void optimizeLimitBy(const ASTSelectQuery * select_query) { if (!(select_query && select_query->limit_by_expression_list)) @@ -641,6 +494,7 @@ void optimizeLimitBy(const ASTSelectQuery * select_query) elems = unique_elems; } +/// Remove duplicated columns from USING(...). void optimizeUsing(const ASTSelectQuery * select_query) { if (!select_query) @@ -740,6 +594,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } } +/// Parse JOIN ON expression and collect ASTs for joined columns. void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -899,6 +754,7 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS add_columns_from_equals_expr(table_join.on_expression); } +/// Find the columns that are obtained by JOIN. void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { @@ -969,4 +825,87 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s } + +SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( + ASTPtr & query, + const NamesAndTypesList & source_columns_, + const Names & required_result_columns, + StoragePtr storage) const +{ + auto * select_query = typeid_cast(query.get()); + if (!storage && select_query) + { + if (auto db_and_table = getDatabaseAndTable(*select_query, 0)) + storage = context.tryGetTable(db_and_table->database, db_and_table->table); + } + + SyntaxAnalyzerResult result; + result.storage = storage; + result.source_columns = source_columns_; + + collectSourceColumns(select_query, result.storage, result.source_columns); + + const auto & settings = context.getSettingsRef(); + + Names source_columns_list; + source_columns_list.reserve(result.source_columns.size()); + for (const auto & type_name : result.source_columns) + source_columns_list.emplace_back(type_name.name); + NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); + + translateQualifiedNames(query, select_query, source_columns_set, context); + + /// Depending on the user's profile, check for the execution rights + /// distributed subqueries inside the IN or JOIN sections and process these subqueries. + InJoinSubqueriesPreprocessor(context).process(select_query); + + /// Optimizes logical expressions. + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + + /// Creates a dictionary `aliases`: alias -> ASTPtr + { + LogAST log; + QueryAliasesVisitor::Data query_aliases_data{result.aliases}; + QueryAliasesVisitor(query_aliases_data, log.stream()).visit(query); + } + + /// Common subexpression elimination. Rewrite rules. + normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, + context, select_query, settings.asterisk_left_columns_only != 0); + + /// Remove unneeded columns according to 'required_result_columns'. + /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. + /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) + /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. + removeUnneededColumnsFromSelectClause(select_query, required_result_columns); + + /// Executing scalar subqueries - replacing them with constant values. + executeScalarSubqueries(query, context, subquery_depth); + + /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. + optimizeIfWithConstantCondition(query, result.aliases); + + /// GROUP BY injective function elimination. + optimizeGroupBy(select_query, source_columns_set, context); + + /// Remove duplicate items from ORDER BY. + optimizeOrderBy(select_query); + + /// Remove duplicated elements from LIMIT BY clause. + optimizeLimitBy(select_query); + + /// Remove duplicated columns from USING(...). + optimizeUsing(select_query); + + /// array_join_alias_to_name, array_join_result_to_source. + getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); + + /// Push the predicate expression down to the subqueries. + result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); + + collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + + return std::make_shared(result); +} + } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 38595917917..5500823b3c2 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -54,16 +54,20 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr; class SyntaxAnalyzer { public: - SyntaxAnalyzer(const Context & context, StoragePtr storage) : context(context), storage(std::move(storage)) {} + SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0) + : context(context_) + , subquery_depth(subquery_depth_) + {} SyntaxAnalyzerResultPtr analyze( ASTPtr & query, const NamesAndTypesList & source_columns_, const Names & required_result_columns = {}, - size_t subquery_depth = 0) const; + StoragePtr storage = {}) const; +private: const Context & context; - StoragePtr storage; + size_t subquery_depth; }; } diff --git a/dbms/src/Interpreters/evaluateConstantExpression.cpp b/dbms/src/Interpreters/evaluateConstantExpression.cpp index 29753a4c637..8f96160186d 100644 --- a/dbms/src/Interpreters/evaluateConstantExpression.cpp +++ b/dbms/src/Interpreters/evaluateConstantExpression.cpp @@ -31,7 +31,7 @@ std::pair> evaluateConstantExpression(co { NamesAndTypesList source_columns = {{ "_dummy", std::make_shared() }}; auto ast = node->clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(ast, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(ast, source_columns); ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions(); /// There must be at least one column in the block so that it knows the number of rows. diff --git a/dbms/src/Interpreters/evaluateMissingDefaults.cpp b/dbms/src/Interpreters/evaluateMissingDefaults.cpp index 33dce42ab8e..9a6884b25e3 100644 --- a/dbms/src/Interpreters/evaluateMissingDefaults.cpp +++ b/dbms/src/Interpreters/evaluateMissingDefaults.cpp @@ -48,7 +48,7 @@ void evaluateMissingDefaults(Block & block, if (!save_unneeded_columns) { - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(block); return; } @@ -57,7 +57,7 @@ void evaluateMissingDefaults(Block & block, * we are going to operate on a copy instead of the original block */ Block copy_block{block}; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(default_expr_list, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(default_expr_list, block.getNamesAndTypesList()); ExpressionAnalyzer{default_expr_list, syntax_result, context}.getActions(true)->execute(copy_block); /// move evaluated columns to the original block, materializing them at the same time diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index 332ccfde3f0..b5fbe0f3314 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -398,7 +398,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) { const auto & default_expression = default_column.second.expression; ASTPtr query = default_expression; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true); const auto required_columns = actions->getRequiredColumns(); @@ -473,7 +473,7 @@ void AlterCommands::validate(const IStorage & table, const Context & context) } ASTPtr query = default_expr_list; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, all_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, all_columns); const auto actions = ExpressionAnalyzer(query, syntax_result, context).getActions(true); const auto block = actions->getSampleBlock(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 119b0861fbc..30c38282d76 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -126,7 +126,7 @@ MergeTreeData::MergeTreeData( && !attach && !settings.compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - auto syntax = SyntaxAnalyzer(global_context, {}).analyze(sample_by_ast, getColumns().getAllPhysical()); + auto syntax = SyntaxAnalyzer(global_context).analyze(sample_by_ast, getColumns().getAllPhysical()); columns_required_for_sampling = ExpressionAnalyzer(sample_by_ast, syntax, global_context) .getRequiredSourceColumns(); } @@ -282,7 +282,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( if (!added_key_column_expr_list->children.empty()) { - auto syntax = SyntaxAnalyzer(global_context, {}).analyze(added_key_column_expr_list, all_columns); + auto syntax = SyntaxAnalyzer(global_context).analyze(added_key_column_expr_list, all_columns); Names used_columns = ExpressionAnalyzer(added_key_column_expr_list, syntax, global_context) .getRequiredSourceColumns(); @@ -305,7 +305,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( } } - auto new_sorting_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_sorting_key_expr_list, all_columns); + auto new_sorting_key_syntax = SyntaxAnalyzer(global_context).analyze(new_sorting_key_expr_list, all_columns); auto new_sorting_key_expr = ExpressionAnalyzer(new_sorting_key_expr_list, new_sorting_key_syntax, global_context) .getActions(false); auto new_sorting_key_sample = @@ -314,7 +314,7 @@ void MergeTreeData::setPrimaryKeyAndColumns( checkKeyExpression(*new_sorting_key_expr, new_sorting_key_sample, "Sorting"); - auto new_primary_key_syntax = SyntaxAnalyzer(global_context, {}).analyze(new_primary_key_expr_list, all_columns); + auto new_primary_key_syntax = SyntaxAnalyzer(global_context).analyze(new_primary_key_expr_list, all_columns); auto new_primary_key_expr = ExpressionAnalyzer(new_primary_key_expr_list, new_primary_key_syntax, global_context) .getActions(false); @@ -376,7 +376,7 @@ void MergeTreeData::initPartitionKey() return; { - auto syntax_result = SyntaxAnalyzer(global_context, {}).analyze(partition_key_expr_list, getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(global_context).analyze(partition_key_expr_list, getColumns().getAllPhysical()); partition_key_expr = ExpressionAnalyzer(partition_key_expr_list, syntax_result, global_context).getActions(false); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index dd5a35ad710..01fb3169013 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -488,7 +488,7 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts( } ASTPtr query = filter_function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, available_real_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, available_real_columns); filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActions(false); /// Add columns needed for `sample_by_ast` to `column_names_to_read`. @@ -848,7 +848,7 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( arguments->children.push_back(one); ASTPtr query = function; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, data.getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, data.getColumns().getAllPhysical()); out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); out_column = function->getColumnName(); } diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index e1baea8f9eb..8f4f31d458c 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -170,7 +170,7 @@ StorageDistributed::~StorageDistributed() = default; static ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, const Context & context, NamesAndTypesList columns, bool project) { ASTPtr query = sharding_key; - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, columns); return ExpressionAnalyzer(query, syntax_result, context).getActions(project); } diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp index 193ca0ebdbb..931105df1ef 100644 --- a/dbms/src/Storages/StorageMerge.cpp +++ b/dbms/src/Storages/StorageMerge.cpp @@ -452,7 +452,7 @@ void StorageMerge::convertingSourceStream(const Block & header, const Context & NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList(); NameAndTypePair virtual_column = getColumn("_table"); source_columns.insert(source_columns.end(), virtual_column); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(where_expression, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); ExpressionActionsPtr actions = ExpressionAnalyzer{where_expression, syntax_result, context}.getActions(false, false); Names required_columns = actions->getRequiredColumns(); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index d78a7a36727..6ce3e58cc75 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -157,7 +157,7 @@ void filterBlockWithQuery(const ASTPtr & query, Block & block, const Context & c return; /// Let's analyze and calculate the expression. - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(expression_ast, block.getNamesAndTypesList()); + auto syntax_result = SyntaxAnalyzer(context).analyze(expression_ast, block.getNamesAndTypesList()); ExpressionAnalyzer analyzer(expression_ast, syntax_result, context); ExpressionActionsPtr actions = analyzer.getActions(false); diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index f37e51b714e..0131d9f2162 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -28,7 +28,7 @@ static void replaceConstFunction(IAST & node, const Context & context, const Nam { NamesAndTypesList source_columns = all_columns; ASTPtr query = function->ptr(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(query, source_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(query, source_columns); auto result_block = KeyCondition::getBlockWithConstants(query, syntax_result, context); if (!result_block.has(child->getColumnName())) return; @@ -92,7 +92,7 @@ String transformQueryForExternalDatabase( const Context & context) { auto clone_query = query.clone(); - auto syntax_result = SyntaxAnalyzer(context, {}).analyze(clone_query, available_columns); + auto syntax_result = SyntaxAnalyzer(context).analyze(clone_query, available_columns); ExpressionAnalyzer analyzer(clone_query, syntax_result, context); const Names & used_columns = analyzer.getRequiredSourceColumns(); From 99330c77adb3de1a82ebab48643f2e613047859a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 9 Jan 2019 19:23:20 +0300 Subject: [PATCH 11/56] Enable part_log in docker test images by default --- docker/test/stateless/Dockerfile | 1 + docker/test/stateless/part_log.xml | 8 ++++++++ docker/test/stress/Dockerfile | 1 + docker/test/stress/part_log.xml | 8 ++++++++ 4 files changed, 18 insertions(+) create mode 100644 docker/test/stateless/part_log.xml create mode 100644 docker/test/stress/part_log.xml diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index bc81c298553..90b4068fb93 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -25,6 +25,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY zookeeper.xml /etc/clickhouse-server/config.d/zookeeper.xml COPY listen.xml /etc/clickhouse-server/config.d/listen.xml +COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ diff --git a/docker/test/stateless/part_log.xml b/docker/test/stateless/part_log.xml new file mode 100644 index 00000000000..6c6fc9c6982 --- /dev/null +++ b/docker/test/stateless/part_log.xml @@ -0,0 +1,8 @@ + + + system + part_log
+ + 7500 +
+
diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 80101688118..8f2524930c1 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -21,6 +21,7 @@ RUN apt-get update -y \ COPY ./stress /stress COPY log_queries.xml /etc/clickhouse-server/users.d/log_queries.xml +COPY part_log.xml /etc/clickhouse-server/config.d/part_log.xml CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-server_*.deb; \ diff --git a/docker/test/stress/part_log.xml b/docker/test/stress/part_log.xml new file mode 100644 index 00000000000..6c6fc9c6982 --- /dev/null +++ b/docker/test/stress/part_log.xml @@ -0,0 +1,8 @@ + + + system + part_log
+ + 7500 +
+
From c93b54a88d80cd0d1b42ba0253cfbfa52cec4b47 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 19:32:34 +0300 Subject: [PATCH 12/56] Removed obsolete tweak, because KDevelop has migrated to clang parser long time ago --- libs/libcommon/include/common/Types.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index d2fdb0a8343..70c9c3d2f3c 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -1,6 +1,5 @@ #pragma once #include -#include using Int8 = int8_t; using Int16 = int16_t; @@ -11,9 +10,3 @@ using UInt8 = uint8_t; using UInt16 = uint16_t; using UInt32 = uint32_t; using UInt64 = uint64_t; - -/// Workaround for the issue, that KDevelop doesn't see time_t and size_t types (for syntax highlight). -#ifdef IN_KDEVELOP_PARSER - using time_t = Int64; - using size_t = UInt64; -#endif From 7ba268049aca6eb21e327a5bb7de05ede90ffc2f Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 20:06:40 +0300 Subject: [PATCH 13/56] extract OptimizeIfWithConstantConditionVisitor from SyntaxAnalyzer --- ...OptimizeIfWithConstantConditionVisitor.cpp | 108 ++++++++++++++++++ .../OptimizeIfWithConstantConditionVisitor.h | 27 +++++ dbms/src/Interpreters/SyntaxAnalyzer.cpp | 97 +--------------- 3 files changed, 137 insertions(+), 95 deletions(-) create mode 100644 dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp create mode 100644 dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h diff --git a/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp new file mode 100644 index 00000000000..e73a734ab16 --- /dev/null +++ b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -0,0 +1,108 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) +{ + /// numeric constant in condition + if (const ASTLiteral * literal = typeid_cast(condition.get())) + { + if (literal->value.getType() == Field::Types::Int64 || + literal->value.getType() == Field::Types::UInt64) + { + value = literal->value.get(); + return true; + } + } + + /// cast of numeric constant in condition to UInt8 + if (const ASTFunction * function = typeid_cast(condition.get())) + { + if (function->name == "CAST") + { + if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) + { + const ASTPtr & type_ast = expr_list->children.at(1); + if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) + { + if (type_literal->value.getType() == Field::Types::String && + type_literal->value.get() == "UInt8") + return tryExtractConstValueFromCondition(expr_list->children.at(0), value); + } + } + } + } + + return false; +} + +void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) +{ + if (!current_ast) + return; + + for (ASTPtr & child : current_ast->children) + { + auto * function_node = typeid_cast(child.get()); + if (!function_node || function_node->name != "if") + { + visit(child); + continue; + } + + visit(function_node->arguments); + auto * args = typeid_cast(function_node->arguments.get()); + + if (args->children.size() != 3) + throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTPtr condition_expr = args->children[0]; + ASTPtr then_expr = args->children[1]; + ASTPtr else_expr = args->children[2]; + + bool condition; + if (tryExtractConstValueFromCondition(condition_expr, condition)) + { + ASTPtr replace_ast = condition ? then_expr : else_expr; + ASTPtr child_copy = child; + String replace_alias = replace_ast->tryGetAlias(); + String if_alias = child->tryGetAlias(); + + if (replace_alias.empty()) + { + replace_ast->setAlias(if_alias); + child = replace_ast; + } + else + { + /// Only copy of one node is required here. + /// But IAST has only method for deep copy of subtree. + /// This can be a reason of performance degradation in case of deep queries. + ASTPtr replace_ast_deep_copy = replace_ast->clone(); + replace_ast_deep_copy->setAlias(if_alias); + child = replace_ast_deep_copy; + } + + if (!if_alias.empty()) + { + auto alias_it = aliases.find(if_alias); + if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) + alias_it->second = child; + } + } + } +} + +} diff --git a/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h new file mode 100644 index 00000000000..ee738ec05e2 --- /dev/null +++ b/dbms/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +#include + +namespace DB +{ + +/// It removes Function_if node from AST if condition is constant. +/// TODO: rewrite with InDepthNodeVisitor +class OptimizeIfWithConstantConditionVisitor +{ +public: + using Aliases = std::unordered_map; + + OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + : aliases(aliases_) + {} + + void visit(ASTPtr & ast); + +private: + Aliases & aliases; +}; + +} diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 4bce5ce7777..cd156076b79 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -34,7 +35,6 @@ namespace DB namespace ErrorCodes { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ALIAS_REQUIRED; extern const int MULTIPLE_EXPRESSIONS_FOR_ALIAS; extern const int EMPTY_NESTED_TABLE; @@ -209,99 +209,6 @@ void executeScalarSubqueries(ASTPtr & query, const Context & context, size_t sub ExecuteScalarSubqueriesVisitor(visitor_data, log.stream()).visit(query); } -bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & value) -{ - /// numeric constant in condition - if (const ASTLiteral * literal = typeid_cast(condition.get())) - { - if (literal->value.getType() == Field::Types::Int64 || - literal->value.getType() == Field::Types::UInt64) - { - value = literal->value.get(); - return true; - } - } - - /// cast of numeric constant in condition to UInt8 - if (const ASTFunction * function = typeid_cast(condition.get())) - { - if (function->name == "CAST") - { - if (ASTExpressionList * expr_list = typeid_cast(function->arguments.get())) - { - const ASTPtr & type_ast = expr_list->children.at(1); - if (const ASTLiteral * type_literal = typeid_cast(type_ast.get())) - { - if (type_literal->value.getType() == Field::Types::String && - type_literal->value.get() == "UInt8") - return tryExtractConstValueFromCondition(expr_list->children.at(0), value); - } - } - } - } - - return false; -} - -/// Remove Function_if AST if condition is constant. -void optimizeIfWithConstantCondition(ASTPtr & current_ast, SyntaxAnalyzerResult::Aliases & aliases) -{ - if (!current_ast) - return; - - for (ASTPtr & child : current_ast->children) - { - auto * function_node = typeid_cast(child.get()); - if (!function_node || function_node->name != "if") - { - optimizeIfWithConstantCondition(child, aliases); - continue; - } - - optimizeIfWithConstantCondition(function_node->arguments, aliases); - auto * args = typeid_cast(function_node->arguments.get()); - - if (args->children.size() != 3) - throw Exception("Wrong number of arguments for function 'if' (" + toString(args->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - ASTPtr condition_expr = args->children[0]; - ASTPtr then_expr = args->children[1]; - ASTPtr else_expr = args->children[2]; - - bool condition; - if (tryExtractConstValueFromCondition(condition_expr, condition)) - { - ASTPtr replace_ast = condition ? then_expr : else_expr; - ASTPtr child_copy = child; - String replace_alias = replace_ast->tryGetAlias(); - String if_alias = child->tryGetAlias(); - - if (replace_alias.empty()) - { - replace_ast->setAlias(if_alias); - child = replace_ast; - } - else - { - /// Only copy of one node is required here. - /// But IAST has only method for deep copy of subtree. - /// This can be a reason of performance degradation in case of deep queries. - ASTPtr replace_ast_deep_copy = replace_ast->clone(); - replace_ast_deep_copy->setAlias(if_alias); - child = replace_ast_deep_copy; - } - - if (!if_alias.empty()) - { - auto alias_it = aliases.find(if_alias); - if (alias_it != aliases.end() && alias_it->second.get() == child_copy.get()) - alias_it->second = child; - } - } - } -} - /** Calls to these functions in the GROUP BY statement would be * replaced by their immediate argument. */ @@ -883,7 +790,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( executeScalarSubqueries(query, context, subquery_depth); /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. - optimizeIfWithConstantCondition(query, result.aliases); + OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); /// GROUP BY injective function elimination. optimizeGroupBy(select_query, source_columns_set, context); From 4712fd668ea83e5d3f445aa58e7d490f358705a3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Jan 2019 20:31:30 +0300 Subject: [PATCH 14/56] Addition to prev. revision --- dbms/src/IO/Progress.h | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/IO/Progress.h b/dbms/src/IO/Progress.h index c21196befac..7dca03f03c2 100644 --- a/dbms/src/IO/Progress.h +++ b/dbms/src/IO/Progress.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include From c460a270daf426f4be6bc37453ac18705aeb240b Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 9 Jan 2019 20:40:26 +0300 Subject: [PATCH 15/56] enlight when SyntaxAnalyzer affects only selects --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 67 +++++++++++------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index cd156076b79..364cf221f35 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -85,7 +85,7 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!select_query || !select_query->tables || select_query->tables->children.empty()) + if (!select_query->tables || select_query->tables->children.empty()) return; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); @@ -165,9 +165,6 @@ bool hasArrayJoin(const ASTPtr & ast) /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result. void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns) { - if (!select_query) - return; - if (required_result_columns.empty()) return; @@ -251,7 +248,7 @@ const std::unordered_set possibly_injective_function_names /// Eliminates injective function calls and constant expressions from group by statement. void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!(select_query && select_query->group_expression_list)) + if (!select_query->group_expression_list) return; const auto is_literal = [] (const ASTPtr & ast) @@ -355,7 +352,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, const NameSet & source_colum /// Remove duplicate items from ORDER BY. void optimizeOrderBy(const ASTSelectQuery * select_query) { - if (!(select_query && select_query->order_expression_list)) + if (!select_query->order_expression_list) return; /// Make unique sorting conditions. @@ -382,7 +379,7 @@ void optimizeOrderBy(const ASTSelectQuery * select_query) /// Remove duplicate items from LIMIT BY. void optimizeLimitBy(const ASTSelectQuery * select_query) { - if (!(select_query && select_query->limit_by_expression_list)) + if (!select_query->limit_by_expression_list) return; std::set elems_set; @@ -404,9 +401,6 @@ void optimizeLimitBy(const ASTSelectQuery * select_query) /// Remove duplicated columns from USING(...). void optimizeUsing(const ASTSelectQuery * select_query) { - if (!select_query) - return; - auto node = const_cast(select_query->join()); if (!node) return; @@ -437,9 +431,6 @@ void optimizeUsing(const ASTSelectQuery * select_query) void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query, const Names & source_columns, const NameSet & source_columns_set) { - if (!select_query) - return; - ASTPtr array_join_expression_list = select_query->array_join_expression_list(); if (array_join_expression_list) { @@ -665,9 +656,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, const NameSet & source_columns, const Context & context) { - if (!select_query) - return; - const ASTTablesInSelectQueryElement * node = select_query->join(); if (!node) @@ -760,14 +748,17 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( source_columns_list.emplace_back(type_name.name); NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); - translateQualifiedNames(query, select_query, source_columns_set, context); + if (select_query) + { + translateQualifiedNames(query, select_query, source_columns_set, context); - /// Depending on the user's profile, check for the execution rights - /// distributed subqueries inside the IN or JOIN sections and process these subqueries. - InJoinSubqueriesPreprocessor(context).process(select_query); + /// Depending on the user's profile, check for the execution rights + /// distributed subqueries inside the IN or JOIN sections and process these subqueries. + InJoinSubqueriesPreprocessor(context).process(select_query); - /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + /// Optimizes logical expressions. + LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + } /// Creates a dictionary `aliases`: alias -> ASTPtr { @@ -784,7 +775,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) /// and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations. - removeUnneededColumnsFromSelectClause(select_query, required_result_columns); + if (select_query) + removeUnneededColumnsFromSelectClause(select_query, required_result_columns); /// Executing scalar subqueries - replacing them with constant values. executeScalarSubqueries(query, context, subquery_depth); @@ -792,25 +784,28 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Optimize if with constant condition after constants was substituted instead of sclalar subqueries. OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query); - /// GROUP BY injective function elimination. - optimizeGroupBy(select_query, source_columns_set, context); + if (select_query) + { + /// GROUP BY injective function elimination. + optimizeGroupBy(select_query, source_columns_set, context); - /// Remove duplicate items from ORDER BY. - optimizeOrderBy(select_query); + /// Remove duplicate items from ORDER BY. + optimizeOrderBy(select_query); - /// Remove duplicated elements from LIMIT BY clause. - optimizeLimitBy(select_query); + /// Remove duplicated elements from LIMIT BY clause. + optimizeLimitBy(select_query); - /// Remove duplicated columns from USING(...). - optimizeUsing(select_query); + /// Remove duplicated columns from USING(...). + optimizeUsing(select_query); - /// array_join_alias_to_name, array_join_result_to_source. - getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); + /// array_join_alias_to_name, array_join_result_to_source. + getArrayJoinedColumns(query, result, select_query, source_columns_list, source_columns_set); - /// Push the predicate expression down to the subqueries. - result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); + /// Push the predicate expression down to the subqueries. + result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); - collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + collectJoinedColumns(result.analyzed_join, select_query, source_columns_set, context); + } return std::make_shared(result); } From 006a764df9a9f1456555c6902b238c2ddb04c684 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 9 Jan 2019 20:52:25 +0300 Subject: [PATCH 16/56] don't use pool for TaskStatsInfoGetter [#CLICKHOUSE-4209] Pool is not needed because creation of a TaskStatsInfoGetter takes an order of 10us. Also pool is harmful because created sockets are never closed. --- dbms/src/Common/CurrentThread.cpp | 3 --- dbms/src/Common/ThreadStatus.cpp | 5 +---- dbms/src/Common/ThreadStatus.h | 4 +--- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index a3919108724..b2f165e5469 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -3,7 +3,6 @@ #include "CurrentThread.h" #include #include -#include #include #include #include @@ -24,8 +23,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -SimpleObjectPool task_stats_info_getter_pool; - // Smoker's implementation to avoid thread_local usage: error: undefined symbol: __cxa_thread_atexit #if defined(ARCADIA_ROOT) struct ThreadStatusPtrHolder : ThreadStatusPtr diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index 7a321cdaeb7..0ee09d527ce 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -21,9 +21,6 @@ namespace ErrorCodes } -extern SimpleObjectPool task_stats_info_getter_pool; - - TasksStatsCounters TasksStatsCounters::current() { TasksStatsCounters res; @@ -74,7 +71,7 @@ void ThreadStatus::initPerformanceCounters() if (TaskStatsInfoGetter::checkPermissions()) { if (!taskstats_getter) - taskstats_getter = task_stats_info_getter_pool.getDefault(); + taskstats_getter = std::make_unique(); *last_taskstats = TasksStatsCounters::current(); } diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 822e1931447..3f7a91a54f0 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -2,7 +2,6 @@ #include #include -#include #include @@ -175,8 +174,7 @@ protected: std::unique_ptr last_taskstats; /// Set to non-nullptr only if we have enough capabilities. - /// We use pool because creation and destruction of TaskStatsInfoGetter objects are expensive. - SimpleObjectPool::Pointer taskstats_getter; + std::unique_ptr taskstats_getter; }; } From 1dab649bf3b5f3b88de4bdd38d25994f0c93bdf4 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 9 Jan 2019 21:16:54 +0300 Subject: [PATCH 17/56] Revert "Apply upstream jemalloc patch for potential leak" --- contrib/jemalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc b/contrib/jemalloc index cd2931ad9bb..41b7372eade 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff +Subproject commit 41b7372eadee941b9164751b8d4963f915d3ceae From 64c2c3650c556dae0e449c06322301c07fbd5b87 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 9 Jan 2019 21:32:43 +0300 Subject: [PATCH 18/56] cmake: Dont cache version; macos: fix build; /release_lib.sh move to utils (#4016) --- dbms/CMakeLists.txt | 2 +- dbms/cmake/version.cmake | 20 +++++++++---------- dbms/src/Common/ShellCommand.cpp | 2 +- release | 2 +- .../release/release_lib.sh | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) rename release_lib.sh => utils/release/release_lib.sh (99%) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 40670f391bc..84c4b76d6fb 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -16,7 +16,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h) set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h) include (cmake/version.cmake) -message (STATUS "Will build ${VERSION_FULL}") +message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}") configure_file (src/Common/config.h.in ${CONFIG_COMMON}) configure_file (src/Common/config_version.h.in ${CONFIG_VERSION}) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 61bf2b2a6f9..94a10028518 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54413 CACHE STRING "") # changed manually for tests -set(VERSION_MAJOR 19 CACHE STRING "") -set(VERSION_MINOR 1 CACHE STRING "") -set(VERSION_PATCH 0 CACHE STRING "") -set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067 CACHE STRING "") -set(VERSION_DESCRIBE v19.1.0-testing CACHE STRING "") -set(VERSION_STRING 19.1.0 CACHE STRING "") +set(VERSION_REVISION 54413) +set(VERSION_MAJOR 19) +set(VERSION_MINOR 1) +set(VERSION_PATCH 0) +set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067) +set(VERSION_DESCRIBE v19.1.0-testing) +set(VERSION_STRING 19.1.0) # end of autochange set(VERSION_EXTRA "" CACHE STRING "") @@ -19,8 +19,8 @@ if (VERSION_EXTRA) string(CONCAT VERSION_STRING ${VERSION_STRING} "." ${VERSION_EXTRA}) endif () -set (VERSION_NAME "${PROJECT_NAME}" CACHE STRING "") -set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}" CACHE STRING "") -set (VERSION_SO "${VERSION_STRING}" CACHE STRING "") +set (VERSION_NAME "${PROJECT_NAME}") +set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") +set (VERSION_SO "${VERSION_STRING}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") diff --git a/dbms/src/Common/ShellCommand.cpp b/dbms/src/Common/ShellCommand.cpp index f9835efe68d..84961292d02 100644 --- a/dbms/src/Common/ShellCommand.cpp +++ b/dbms/src/Common/ShellCommand.cpp @@ -36,7 +36,7 @@ namespace if (0 != pipe2(fds_rw, O_CLOEXEC)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); #else - if (0 != pipe(fds)) + if (0 != pipe(fds_rw)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); if (0 != fcntl(fds_rw[0], F_SETFD, FD_CLOEXEC)) DB::throwFromErrno("Cannot create pipe", DB::ErrorCodes::CANNOT_PIPE); diff --git a/release b/release index 23bfd6f2dd6..e3c8842a820 100755 --- a/release +++ b/release @@ -32,7 +32,7 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) cd $CURDIR -source "./release_lib.sh" +source "./utils/release/release_lib.sh" PBUILDER_AUTOUPDATE=${PBUILDER_AUTOUPDATE=4320} diff --git a/release_lib.sh b/utils/release/release_lib.sh similarity index 99% rename from release_lib.sh rename to utils/release/release_lib.sh index ecdc10deefe..45a01e3f745 100644 --- a/release_lib.sh +++ b/utils/release/release_lib.sh @@ -9,7 +9,7 @@ function gen_version_string { } function get_version { - BASEDIR=$(dirname "${BASH_SOURCE[0]}") + BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/dbms/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` From 1eb0750cfa6049ff528ac5d1273899c7ac4e0dd5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 13:35:17 +0300 Subject: [PATCH 19/56] Get clang-7 from clang repo and rollback to ubuntu 18.04 --- docker/packager/binary/Dockerfile | 4 +++- docker/packager/deb/Dockerfile | 10 ++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index dfad9f9e123..b7ed6e68b7a 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -1,4 +1,6 @@ -FROM ubuntu:18.10 +FROM ubuntu:18.04 + +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 384ab76f625..98cda15a587 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -1,8 +1,10 @@ -FROM ubuntu:18.10 +FROM ubuntu:18.04 -RUN apt-get update -y \ +RUN echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main" >> /etc/apt/sources.list + +RUN apt-get --allow-unauthenticated update -y \ && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --yes --no-install-recommends \ + apt-get --allow-unauthenticated install --yes --no-install-recommends \ bash \ fakeroot \ cmake \ @@ -33,8 +35,8 @@ RUN apt-get update -y \ devscripts \ debhelper \ git \ - libc++abi-dev \ libc++-dev \ + libc++abi-dev \ libboost-program-options-dev \ libboost-system-dev \ libboost-filesystem-dev \ From 6401628dc85bd3a1f7aff3b840844c200e13d30d Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 14:25:59 +0300 Subject: [PATCH 20/56] Fix clang-7 werrors --- dbms/src/Common/config.h.in | 1 + dbms/src/Interpreters/ExpressionJIT.cpp | 30 ++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 624c87b91b5..09c2eadde29 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -16,6 +16,7 @@ #cmakedefine01 USE_BASE64 #cmakedefine01 USE_HDFS #cmakedefine01 USE_XXHASH +#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 LLVM_HAS_RTTI diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 123778c6fe9..9ac95e3a107 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -161,21 +161,21 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr) // Actually this should work for 7.0.0 but now we have OLDER 7.0.0svn in contrib auto flags = [&](const llvm::orc::SymbolNameSet & symbols) { - llvm::orc::SymbolFlagsMap flags; + llvm::orc::SymbolFlagsMap flags_map; for (const auto & symbol : symbols) { auto resolved = jsr.lookupFlags({*symbol}); if (resolved && resolved->size()) - flags.emplace(symbol, resolved->begin()->second); + flags_map.emplace(symbol, resolved->begin()->second); } - return flags; + return flags_map; }; #endif - auto symbols = [&](std::shared_ptr query, llvm::orc::SymbolNameSet symbols) + auto symbols = [&](std::shared_ptr query, llvm::orc::SymbolNameSet symbols_set) { llvm::orc::SymbolNameSet missing; - for (const auto & symbol : symbols) + for (const auto & symbol : symbols_set) { auto resolved = jsr.lookup({*symbol}); if (resolved && resolved->size()) @@ -275,20 +275,20 @@ struct LLVMContext { if (!module->size()) return 0; - llvm::PassManagerBuilder builder; + llvm::PassManagerBuilder pass_manager_builder; llvm::legacy::PassManager mpm; llvm::legacy::FunctionPassManager fpm(module.get()); - builder.OptLevel = 3; - builder.SLPVectorize = true; - builder.LoopVectorize = true; - builder.RerollLoops = true; - builder.VerifyInput = true; - builder.VerifyOutput = true; - machine->adjustPassManager(builder); + pass_manager_builder.OptLevel = 3; + pass_manager_builder.SLPVectorize = true; + pass_manager_builder.LoopVectorize = true; + pass_manager_builder.RerollLoops = true; + pass_manager_builder.VerifyInput = true; + pass_manager_builder.VerifyOutput = true; + machine->adjustPassManager(pass_manager_builder); fpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); mpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); - builder.populateFunctionPassManager(fpm); - builder.populateModulePassManager(mpm); + pass_manager_builder.populateFunctionPassManager(fpm); + pass_manager_builder.populateModulePassManager(mpm); fpm.doInitialization(); for (auto & function : *module) fpm.run(function); From 8ae59da55758fa2e088557bcad2292e5efd168d9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 14:46:20 +0300 Subject: [PATCH 21/56] Fix name hiding --- dbms/src/Functions/IFunction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 6b6186302f7..ac5d1122e4a 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -512,8 +512,8 @@ static std::optional removeNullables(const DataTypes & types) if (!typeid_cast(type.get())) continue; DataTypes filtered; - for (const auto & type : types) - filtered.emplace_back(removeNullable(type)); + for (const auto & sub_type : types) + filtered.emplace_back(removeNullable(sub_type)); return filtered; } return {}; From b4a310b22eb6d6a8e1e749c79af185ae00eddb57 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 10 Jan 2019 16:22:37 +0300 Subject: [PATCH 22/56] Investigation of performance degradation of jemalloc --- libs/libcommon/src/tests/CMakeLists.txt | 2 ++ libs/libcommon/src/tests/allocator.cpp | 44 +++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 libs/libcommon/src/tests/allocator.cpp diff --git a/libs/libcommon/src/tests/CMakeLists.txt b/libs/libcommon/src/tests/CMakeLists.txt index 86f15cd7a53..2d46cb7146d 100644 --- a/libs/libcommon/src/tests/CMakeLists.txt +++ b/libs/libcommon/src/tests/CMakeLists.txt @@ -7,6 +7,7 @@ add_executable (date_lut4 date_lut4.cpp) add_executable (date_lut_default_timezone date_lut_default_timezone.cpp) add_executable (multi_version multi_version.cpp) add_executable (local_date_time_comparison local_date_time_comparison.cpp) +add_executable (realloc-perf allocator.cpp) set(PLATFORM_LIBS ${CMAKE_DL_LIBS}) @@ -17,6 +18,7 @@ target_link_libraries (date_lut4 common ${PLATFORM_LIBS}) target_link_libraries (date_lut_default_timezone common ${PLATFORM_LIBS}) target_link_libraries (multi_version common) target_link_libraries (local_date_time_comparison common) +target_link_libraries (realloc-perf common) add_check(multi_version) add_check(local_date_time_comparison) diff --git a/libs/libcommon/src/tests/allocator.cpp b/libs/libcommon/src/tests/allocator.cpp new file mode 100644 index 00000000000..8664d4cb073 --- /dev/null +++ b/libs/libcommon/src/tests/allocator.cpp @@ -0,0 +1,44 @@ +#include +#include +#include +#include + + +void thread_func() +{ + for (size_t i = 0; i < 100000; ++i) + { + size_t size = 4096; + + void * buf = malloc(size); + if (!buf) + abort(); + memset(buf, 0, size); + + while (size < 1048576) + { + size_t next_size = size * 4; + + void * new_buf = realloc(buf, next_size); + if (!new_buf) + abort(); + buf = new_buf; + + memset(buf + size, 0, next_size - size); + size = next_size; + } + + free(buf); + } +} + + +int main(int, char **) +{ + std::vector threads(16); + for (auto & thread : threads) + thread = std::thread(thread_func); + for (auto & thread : threads) + thread.join(); + return 0; +} From 7fc85d1ff089a882a2c764cd4da3f593f1f9ccda Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 10 Jan 2019 16:26:57 +0300 Subject: [PATCH 23/56] Investigation of performance degradation of jemalloc --- libs/libcommon/src/tests/allocator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libcommon/src/tests/allocator.cpp b/libs/libcommon/src/tests/allocator.cpp index 8664d4cb073..a6eff63b20d 100644 --- a/libs/libcommon/src/tests/allocator.cpp +++ b/libs/libcommon/src/tests/allocator.cpp @@ -24,7 +24,7 @@ void thread_func() abort(); buf = new_buf; - memset(buf + size, 0, next_size - size); + memset(reinterpret_cast(buf) + size, 0, next_size - size); size = next_size; } From 6cfa68d9dae5830becf52833effae8e3f573ce02 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 10 Jan 2019 18:01:22 +0300 Subject: [PATCH 24/56] Investigation of performance degradation of jemalloc --- libs/libcommon/src/tests/allocator.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/libs/libcommon/src/tests/allocator.cpp b/libs/libcommon/src/tests/allocator.cpp index a6eff63b20d..03f6228e0f5 100644 --- a/libs/libcommon/src/tests/allocator.cpp +++ b/libs/libcommon/src/tests/allocator.cpp @@ -6,7 +6,7 @@ void thread_func() { - for (size_t i = 0; i < 100000; ++i) + for (size_t i = 0; i < 100; ++i) { size_t size = 4096; @@ -36,9 +36,12 @@ void thread_func() int main(int, char **) { std::vector threads(16); - for (auto & thread : threads) - thread = std::thread(thread_func); - for (auto & thread : threads) - thread.join(); + for (size_t i = 0; i < 1000; ++i) + { + for (auto & thread : threads) + thread = std::thread(thread_func); + for (auto & thread : threads) + thread.join(); + } return 0; } From 275d6b2fcc0598c74549f27fc62d673323cefa18 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 10 Jan 2019 21:58:55 +0300 Subject: [PATCH 25/56] some QueryNormalizer refactoring --- .../Interpreters/DatabaseAndTableWithAlias.h | 1 + .../PredicateExpressionsOptimizer.cpp | 2 +- dbms/src/Interpreters/QueryNormalizer.cpp | 62 ++++++++++++------- dbms/src/Interpreters/QueryNormalizer.h | 14 ++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 31 ++++++---- 5 files changed, 65 insertions(+), 45 deletions(-) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index ad7c1f2f8d4..8076deb5ee9 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -23,6 +23,7 @@ struct DatabaseAndTableWithAlias String table; String alias; + DatabaseAndTableWithAlias() = default; DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 408b827adae..ea50ac1d6f4 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -321,7 +321,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); - QueryNormalizer(ast, aliases, settings, {}, {}).perform(); + QueryNormalizer(ast, aliases, settings).perform(); for (const auto & projection_column : select_query->select_expression_list->children) { diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index eff50bfb235..37d7b9221f6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -21,13 +21,10 @@ namespace ErrorCodes } -QueryNormalizer::QueryNormalizer(ASTPtr & query, const QueryNormalizer::Aliases & aliases, - ExtractedSettings && settings_, const Names & all_column_names, - const TableNamesAndColumnNames & table_names_and_column_names) - : query(query), aliases(aliases), settings(settings_), all_column_names(all_column_names), - table_names_and_column_names(table_names_and_column_names) -{ -} +QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_, + std::vector && tables_with_columns_) + : query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_) +{} void QueryNormalizer::perform() { @@ -138,23 +135,42 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf else if (ASTExpressionList * expr_list = typeid_cast(ast.get())) { /// Replace *, alias.*, database.table.* with a list of columns. - ASTs & asts = expr_list->children; - for (ssize_t expr_idx = asts.size() - 1; expr_idx >= 0; --expr_idx) - { - if (typeid_cast(asts[expr_idx].get()) && !all_column_names.empty()) - { - asts.erase(asts.begin() + expr_idx); - for (size_t column_idx = 0; column_idx < all_column_names.size(); ++column_idx) - asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared(all_column_names[column_idx])); - } - else if (typeid_cast(asts[expr_idx].get()) && !table_names_and_column_names.empty()) + ASTs old_children; + if (processAsterisks()) + { + bool has_asterisk = false; + for (const auto & child : expr_list->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + { + has_asterisk = true; + break; + } + } + + if (has_asterisk) + { + old_children.swap(expr_list->children); + expr_list->children.reserve(old_children.size()); + } + } + + for (const auto & child : old_children) + { + if (typeid_cast(child.get())) + { + for (const auto & [table_name, table_columns] : tables_with_columns) + for (const auto & column_name : table_columns) + expr_list->children.emplace_back(std::make_shared(column_name)); + } + else if (const auto * qualified_asterisk = typeid_cast(child.get())) { - const ASTQualifiedAsterisk * qualified_asterisk = static_cast(asts[expr_idx].get()); const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); size_t num_components = identifier->children.size(); - for (const auto & [table_name, table_all_column_names] : table_names_and_column_names) + for (const auto & [table_name, table_columns] : tables_with_columns) { if ((num_components == 2 /// database.table.* && !table_name.database.empty() /// This is normal (not a temporary) table. @@ -164,14 +180,14 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* { - asts.erase(asts.begin() + expr_idx); - for (size_t column_idx = 0; column_idx < table_all_column_names.size(); ++column_idx) - asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared(table_all_column_names[column_idx])); - + for (const auto & column_name : table_columns) + expr_list->children.emplace_back(std::make_shared(column_name)); break; } } } + else + expr_list->children.emplace_back(child); } } else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast(ast.get())) diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 376b3ba6e07..a63fde8bb73 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -18,10 +18,6 @@ inline bool functionIsInOrGlobalInOperator(const String & name) } -using TableNameAndColumnNames = std::pair; -using TableNamesAndColumnNames = std::vector; - - class QueryNormalizer { /// Extracts settings, mostly to show which are used and which are not. @@ -41,9 +37,10 @@ class QueryNormalizer public: using Aliases = std::unordered_map; + using TableWithColumnNames = std::pair; - QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, const Names & all_columns_name, - const TableNamesAndColumnNames & table_names_and_column_names); + QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, + std::vector && tables_with_columns = {}); void perform(); @@ -54,8 +51,9 @@ private: ASTPtr & query; const Aliases & aliases; const ExtractedSettings settings; - const Names & all_column_names; - const TableNamesAndColumnNames & table_names_and_column_names; + const std::vector tables_with_columns; + + bool processAsterisks() const { return !tables_with_columns.empty(); } void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level); }; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 364cf221f35..0dc9bd3670e 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -42,9 +42,9 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; } -void removeDuplicateColumns(NamesAndTypesList & columns) +NameSet removeDuplicateColumns(NamesAndTypesList & columns) { - std::set names; + NameSet names; for (auto it = columns.begin(); it != columns.end();) { if (names.emplace(it->name).second) @@ -52,6 +52,7 @@ void removeDuplicateColumns(NamesAndTypesList & columns) else columns.erase(it++); } + return names; } namespace @@ -77,8 +78,6 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); } } - - removeDuplicateColumns(source_columns); } /// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. @@ -102,12 +101,11 @@ void normalizeTree( SyntaxAnalyzerResult & result, const Names & source_columns, const NameSet & source_columns_set, - const StoragePtr & storage, const Context & context, const ASTSelectQuery * select_query, bool asterisk_left_columns_only) { - Names all_columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns; + Names all_columns_name = source_columns; if (!asterisk_left_columns_only) { @@ -119,17 +117,20 @@ void normalizeTree( if (all_columns_name.empty()) throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - TableNamesAndColumnNames table_names_and_column_names; + std::vector table_with_columns; if (select_query && select_query->tables && !select_query->tables->children.empty()) { std::vector tables_expression = getSelectTablesExpression(*select_query); bool first = true; + String current_database = context.getCurrentDatabase(); for (const auto * table_expression : tables_expression) { - DatabaseAndTableWithAlias table_name(*table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias table_name(*table_expression, current_database); NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); + removeDuplicateColumns(names_and_types); + if (!first) { /// For joined tables qualify duplicating names. @@ -140,12 +141,13 @@ void normalizeTree( first = false; - table_names_and_column_names.emplace_back(std::pair(table_name, names_and_types.getNames())); + table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); } } + else + table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - auto & settings = context.getSettingsRef(); - QueryNormalizer(query, result.aliases, settings, all_columns_name, table_names_and_column_names).perform(); + QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform(); } bool hasArrayJoin(const ASTPtr & ast) @@ -739,6 +741,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( result.source_columns = source_columns_; collectSourceColumns(select_query, result.storage, result.source_columns); + NameSet source_columns_set = removeDuplicateColumns(result.source_columns); const auto & settings = context.getSettingsRef(); @@ -746,7 +749,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( source_columns_list.reserve(result.source_columns.size()); for (const auto & type_name : result.source_columns) source_columns_list.emplace_back(type_name.name); - NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); + + if (source_columns_set.size() != source_columns_list.size()) + throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); if (select_query) { @@ -768,7 +773,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( } /// Common subexpression elimination. Rewrite rules. - normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, + normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, context, select_query, settings.asterisk_left_columns_only != 0); /// Remove unneeded columns according to 'required_result_columns'. From 59f4f7f1407be907f4e985c2ee3507b4fba35f1f Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 10 Jan 2019 23:20:16 +0300 Subject: [PATCH 26/56] Remove llvm-7 from building images --- docker/packager/binary/Dockerfile | 3 --- docker/packager/deb/Dockerfile | 3 --- 2 files changed, 6 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index b7ed6e68b7a..8927e79b01f 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -23,9 +23,6 @@ RUN apt-get update -y \ lld-7 \ libclang-7-dev \ liblld-7-dev \ - llvm-7 \ - libllvm7 \ - llvm-7-dev \ libicu-dev \ libreadline-dev \ ninja-build \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 98cda15a587..3d7ad768ec5 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -24,9 +24,6 @@ RUN apt-get --allow-unauthenticated update -y \ lld-7 \ libclang-7-dev \ liblld-7-dev \ - llvm-7 \ - libllvm7 \ - llvm-7-dev \ libicu-dev \ libreadline-dev \ ninja-build \ From 0084785898118fcf288fdffc562924d394e5e1e0 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 00:05:01 +0300 Subject: [PATCH 27/56] fix build --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 --- dbms/src/Interpreters/SyntaxAnalyzer.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4e89cabb9d5..52a6c8a5e17 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -78,9 +78,6 @@ namespace ErrorCodes extern const int EXPECTED_ALL_OR_ANY; } -/// From SyntaxAnalyzer.cpp -extern void removeDuplicateColumns(NamesAndTypesList & columns); - ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & query_, const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 5500823b3c2..54ca4dfcf2b 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -8,6 +8,8 @@ namespace DB class IStorage; using StoragePtr = std::shared_ptr; +NameSet removeDuplicateColumns(NamesAndTypesList & columns); + struct SyntaxAnalyzerResult { StoragePtr storage; From 861c225c1e95a966e21538816824a033821978c8 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 01:04:37 +0300 Subject: [PATCH 28/56] one another build fix --- dbms/src/Interpreters/QueryNormalizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 37d7b9221f6..c2e614753fb 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -161,8 +161,8 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf { if (typeid_cast(child.get())) { - for (const auto & [table_name, table_columns] : tables_with_columns) - for (const auto & column_name : table_columns) + for (const auto & pr : tables_with_columns) + for (const auto & column_name : pr.second) expr_list->children.emplace_back(std::make_shared(column_name)); } else if (const auto * qualified_asterisk = typeid_cast(child.get())) From 708a7744f6d723c80c8c38a0cfed362225796421 Mon Sep 17 00:00:00 2001 From: Nikolay Kirsh Date: Fri, 11 Jan 2019 09:58:16 +0300 Subject: [PATCH 29/56] fix page.title on 'hidden' pages (#4033) * fix page.title on 'hidden' pages * Update base.html * Update base.html * Update base.html --- docs/tools/mkdocs-material-theme/base.html | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/tools/mkdocs-material-theme/base.html b/docs/tools/mkdocs-material-theme/base.html index a8950c53c0a..05708186299 100644 --- a/docs/tools/mkdocs-material-theme/base.html +++ b/docs/tools/mkdocs-material-theme/base.html @@ -40,8 +40,10 @@ {% block htmltitle %} {% if page and page.meta and page.meta.title %} {{ page.meta.title }} - {% elif page and page.title and not page.is_homepage %} + {% elif page and page.title and not page.is_homepage and page.title != 'hidden' %} {{ page.title }} - {{ config.site_name }} + {% elif page and page.title and not page.is_homepage and page.title == 'hidden' and page.ancestors %} + {{ (page.ancestors | first).title }} - {{ config.site_name }} {% else %} {{ config.site_name }} {% endif %} From df5e56cb280a138703774d737479cf030b10deeb Mon Sep 17 00:00:00 2001 From: sundyli <543950155@qq.com> Date: Fri, 11 Jan 2019 15:59:16 +0800 Subject: [PATCH 30/56] Add zh/tests doc translate (#4034) * add development tests translate * add development toc --- docs/toc_zh.yml | 10 +- docs/zh/development/tests.md | 258 ++++++++++++++++++++++++++++++++++- 2 files changed, 262 insertions(+), 6 deletions(-) mode change 120000 => 100644 docs/zh/development/tests.md diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 01eda540d27..8faba962c4c 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -178,11 +178,11 @@ nav: - '开发者指南': - 'hidden': 'development/index.md' - - 'Overview of ClickHouse architecture': 'development/architecture.md' - - 'How to build ClickHouse on Linux': 'development/build.md' - - 'How to build ClickHouse on Mac OS X': 'development/build_osx.md' - - 'How to write C++ code': 'development/style.md' - - 'How to run ClickHouse tests': 'development/tests.md' + - 'ClickHouse架构概述': 'development/architecture.md' + - '如何在Linux中编译ClickHouse': 'development/build.md' + - '如何在Mac OS X中编译ClickHouse: 'development/build_osx.md' + - '如何编写C++代码': 'development/style.md' + - '如何运行ClickHouse测试': 'development/tests.md' - '新功能特性': - '路线图': 'roadmap.md' diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md deleted file mode 120000 index c03d36c3916..00000000000 --- a/docs/zh/development/tests.md +++ /dev/null @@ -1 +0,0 @@ -../../en/development/tests.md \ No newline at end of file diff --git a/docs/zh/development/tests.md b/docs/zh/development/tests.md new file mode 100644 index 00000000000..2b5fb7ca0e6 --- /dev/null +++ b/docs/zh/development/tests.md @@ -0,0 +1,257 @@ +# ClickHouse 测试 + + +## 功能性测试 + +功能性测试是最简便使用的。绝大部分 ClickHouse 的功能可以通过功能性测试来测试,任何代码的更改都必须通过该测试。 + +每个功能测试会向正在运行的 ClickHouse服 务器发送一个或多个查询,并将结果与预期结果进行比较。 + +测试用例在 `dbms/src/tests/queries` 目录中。这里有两个子目录:`stateless` 和 `stateful`目录。 无状态的测试无需预加载测试数据集 - 通常是在测试运行期间动态创建小量的数据集。有状态测试需要来自 Yandex.Metrica 的预加载测试数据,而不向一般公众提供。 我们倾向于仅使用“无状态”测试并避免添加新的“有状态”测试。 + +每个测试用例可以是两种类型之一:`.sql` 和 `.sh`。`.sql` 测试文件是用于管理`clickhouse-client --multiquery --testmode`的简单SQL脚本。`.sh` 测试文件是一个可以自己运行的脚本。 + +要运行所有测试,请使用 `dbms/tests/clickhouse-test` 工具,用 `--help` 可以获取所有的选项列表。您可以简单地运行所有测试或运行测试名称中的子字符串过滤的测试子集:`./clickhouse-test substring`。 + +调用功能测试最简单的方法是将 `clickhouse-client` 复制到`/usr/bin/`,运行`clickhouse-server`,然后从自己的目录运行`./ clickhouse-test`。 + +要添加新测试,请在 `dbms/src/tests/queries/0_stateless` 目录内添加新的 `.sql` 或 `.sh` 文件,手动检查,然后按以下方式生成 `.reference` 文件: `clickhouse-client -n --testmode < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`。 + +测试应该只使用(创建,删除等)`test` 数据库中的表,这些表假定是事先创建的; 测试也可以使用临时表。 + +如果要在功能测试中使用分布式查询,可以利用 `remote` 表函数和 `127.0.0.{1..2}` 地址为服务器查询自身; 或者您可以在服务器配置文件中使用预定义的测试集群,例如`test_shard_localhost`。 + +有些测试在名称中标有 `zookeeper`,`shard` 或 `long`。`zookeeper` 用于使用ZooKeeper的测试; `shard` 用于需要服务器监听`127.0.0.*`的测试。`long` 适用于运行时间稍长一秒的测试。 + + +## 已知的bug + +如果我们知道一些可以通过功能测试轻松复制的错误,我们将准备好的功能测试放在 `dbms/src/tests/queries/bugs` 目录中。当修复错误时,这些测试将被移动到 `dbms/src/tests/queries/0_stateless` 目录中。 + + +## 集成测试 + +集成测试允许在集群配置中测试 ClickHouse,并与其他服务器(如MySQL,Postgres,MongoDB)进行 ClickHouse 交互。它们可用于模拟网络拆分,数据包丢弃等。这些测试在Docker 下运行,并使用各种软件创建多个容器。 + +参考 `dbms/tests/integration/README.md` 文档关于如何使用集成测试。 + +请注意,ClickHouse 与第三方驱动程序的集成未经过测试。此外,我们目前还没有与 JDBC 和ODBC 驱动程序进行集成测试。 + + +## 单元测试 + +当您想要测试整个 ClickHouse,而不是单个独立的库或类时,单元测试非常有用。您可以使用`ENABLE_TESTS` CMake 选项启用或禁用测试构建。单元测试(和其他测试程序)位于代码中的`tests` 子目录中。要运行单元测试,请键入 `ninja test`。有些测试使用 `gtest`,但有些只是在测试失败时返回非零状态码。 + +如果代码已经被功能测试覆盖(并且功能测试通常使用起来要简单得多),则不一定要进行单元测试。 + + +## 性能测试 + +性能测试允许测量和比较综合查询中 ClickHouse 的某些独立部分的性能。测试位于`dbms/tests/performance` 目录中。每个测试都由 `.xml` 文件表示,并附有测试用例的描述。使用 `clickhouse performance-test` 工具(嵌入在 `clickhouse` 二进制文件中)运行测试。请参阅 `--help` 以进行调用。 + +每个测试在循环中运行一个或多个查询(可能带有参数组合),并具有一些停止条件(如“最大执行速度不会在三秒内更改”)并测量一些有关查询性能的指标(如“最大执行速度”))。某些测试可以包含预加载的测试数据集的前提条件。 + +如果要在某些情况下提高 ClickHouse 的性能,并且如果可以在简单查询上观察到改进,则强烈建议编写性能测试。在测试过程中使用 `perf top` 或其他 perf 工具总是有意义的。 + + +性能测试不是基于每个提交运行的。不收集性能测试结果,我们手动比较它们。 + + +## 测试工具和脚本 + +`tests`目录中的一些程序不是准备测试,而是测试工具。例如,对于`Lexer`,有一个工具`dbms/src/Parsers/tests/lexer` 标准输出。您可以使用这些工具作为代码示例以及探索和手动测试。 + +您还可以将一对文件 `.sh` 和 `.reference` 与工具放在一些预定义的输入上运行它 - 然后可以将脚本结果与 `.reference` 文件进行比较。这些测试不是自动化的。 + + +## 杂项测试 + +有一些外部字典的测试位于 `dbms/tests/external_dictionaries`,机器学习模型在`dbms/tests/external_models`目录。这些测试未更新,必须转移到集成测试。 + +对于分布式数据的插入,有单独的测试。此测试在单独的服务器上运行 ClickHouse 集群并模拟各种故障情况:网络拆分,数据包丢弃(ClickHouse 节点之间,ClickHouse 和 ZooKeeper之间,ClickHouse 服务器和客户端之间等),进行 `kill -9`,`kill -STOP` 和`kill -CONT` 等操作,类似[Jepsen](https://aphyr.com/tags/Jepsen)。然后,测试检查是否已写入所有已确认的插入,并且所有已拒绝的插入都未写入。 + + +在 ClickHouse 开源之前,分布式测试是由单独的团队编写的,但该团队不再使用 ClickHouse,测试是在 Java 中意外编写的。由于这些原因,必须重写分布式测试并将其移至集成测试。 + + +## 手动测试 + +当您开发了新的功能,做手动测试也是合理的。可以按照以下步骤来进行: + +编译 ClickHouse。在命令行中运行 ClickHouse:进入 `dbms/src/programs/clickhouse-server` 目录并运行 `./clickhouse-server`。它会默认使用当前目录的配置文件 (`config.xml`, `users.xml` 以及在 `config.d` 和 `users.d` 目录的文件)。可以使用 `dbms/src/programs/clickhouse-client/clickhouse-client` 来连接数据库。 + +或者,您可以安装 ClickHouse 软件包:从 Yandex 存储库中获得稳定版本,或者您可以在ClickHouse源根目录中使用 `./release` 构建自己的软件包。然后使用 `sudo service clickhouse-server start` 启动服务器(或停止服务器)。在 `/etc/clickhouse-server/clickhouse-server.log` 中查找日志。 + +当您的系统上已经安装了 ClickHouse 时,您可以构建一个新的 `clickhouse` 二进制文件并替换现有的二进制文件: + +``` +sudo service clickhouse-server stop +sudo cp ./clickhouse /usr/bin/ +sudo service clickhouse-server start +``` + +您也可以停止 clickhouse-server 并使用相同的配置运行您自己的服务器,日志打印到终端: +``` +sudo service clickhouse-server stop +sudo -u clickhouse /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml +``` + +使用 gdb 的一个示例: +``` +sudo -u clickhouse gdb --args /usr/bin/clickhouse server --config-file /etc/clickhouse-server/config.xml +``` + +如果 clickhouse-server 已经运行并且您不想停止它,您可以更改 `config.xml` 中的端口号(或在 `config.d` 目录中的文件中覆盖它们),配置适当的数据路径,然后运行它。 + +`clickhouse` 二进制文件几乎没有依赖关系,适用于各种 Linux 发行版。要快速地测试服务器上的更改,您可以简单地将新建的 `clickhouse` 二进制文件 `scp` 到其他服务器,然后按照上面的示例运行它。 + + +## 测试环境 + +在将版本发布为稳定之前,我们将其部署在测试环境中 测试环境是一个处理[Yandex.Metrica](https://metrica.yandex.com/)总数据的1/39部分大小的集群。 我们与 Yandex.Metrica 团队公用我们的测试环境。ClickHouse 在现有数据的基础上无需停机即可升级。 我们首先看到数据处理成功而不会实时滞后,复制继续工作,并且 Yandex.Metrica 团队无法看到问题。 首先的检查可以通过以下方式完成: + +``` +SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; +``` + +在某些情况下,我们还部署到 Yandex 的合作团队的测试环境:市场,云等。此外,我们还有一些用于开发目的的硬件服务器。 + +## 负载测试 + +部署到测试环境后,我们使用生产群集中的查询运行负载测试。 这是手动完成的。 + +确保在生产集群中开启了 `query_log` 选项。 + +收集一天或更多的查询日志: +``` +clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv +``` + +这是一个复杂的例子。`type = 2` 将过滤成功执行的查询。`query LIKE'%ym:%'` 用于从 Yandex.Metrica 中选择相关查询。`is_initial_query` 是仅选择由客户端发起的查询,而不是由 ClickHouse 本身(作为分布式查询处理的一部分)。 + +`scp` 这份日志到测试机器,并运行以下操作: + +``` +clickhouse benchmark --concurrency 16 < queries.tsv +``` +(可能你需要指定运行的用户 `--user`) + +然后离开它一晚或周末休息一下。 + +你要检查下 `clickhouse-server` 是否崩溃,内存占用是否合理,性能也不会随着时间的推移而降低。 + +由于查询和环境的高度可变性,不会记录精确的查询执行时序并且不进行比较。 + + +## 编译测试 + +构建测试允许检查构建在各种替代配置和某些外部系统上是否被破坏。测试位于`ci`目录。 它们从 Docker,Vagrant 中的源代码运行构建,有时在 Docker 中运行 `qemu-user-static`。这些测试正在开发中,测试运行不是自动化的。 + +动机: + +通常我们会在 ClickHouse 构建的单个版本上发布并运行所有测试。 但是有一些未经过彻底测试的替代构建版本。 例子: + +- 在 FreeBSD 中的构建; +- 在 Debian 中使用系统包中的库进行构建; +- 使用库的共享链接构建; +- 在 AArch64 平台进行构建。 + +例如,使用系统包构建是不好的做法,因为我们无法保证系统具有的确切版本的软件包。但 Debian 维护者确实需要这样做。出于这个原因,我们至少必须支持这种构建。另一个例子:共享链接是一个常见的麻烦来源,但是对于一些爱好者来说需要它。 + +虽然我们无法对所有构建版本运行所有测试,但我们想要检查至少不会破坏各种构建变体。为此,我们使用构建测试。 + + +## 测试协议兼容性 + +当我们扩展 ClickHouse 网络协议时,我们手动测试旧的 clickhouse-client 与新的 clickhouse-server 和新的clickhouse-client 一起使用旧的 clickhouse-server (只需从相应的包中运行二进制文件) + + +## 来自编译器的帮助 + +ClickHouse 主要的代码 (位于`dbms`目录中) 使用 `-Wall -Wextra -Werror` 构建,并带有一些其他已启用的警告。 虽然没有为第三方库启用这些选项。 + +Clang 有更多有用的警告 - 您可以使用 `-Weverything` 查找它们并选择默认构建的东西。 + +对于生产构建,使用 gcc(它仍然生成比 clang 稍高效的代码)。对于开发来说,clang 通常更方便使用。您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电量),但请注意,由于更好的控制流程和过程分析,编译器使用 `-O3` 会生成更多警告。 当使用 clang 构建时,使用 `libc++` 而不是 `libstdc++`,并且在使用调试模式构建时,使用调试版本的 `libc++`,它允许在运行时捕获更多错误。 + +## Sanitizers + +**Address sanitizer**. +我们在每个提交的基础上在 ASan 下运行功能和集成测试。 + +**Valgrind (Memcheck)**. +我们在 Valgrind 过夜进行功能测试。 这需要几个小时。 目前在 `re2` 库中有一个已知的误报,请参阅 [文章](https://research.swtch.com/sparse)。 + +**Thread sanitizer**. +我们在 TSan 下进行功能测试。ClickHouse 必须通过所有测试。在 TSan 下运行不是自动化的,只是偶尔执行。 + +**Memory sanitizer**. +目前我们不使用 MSan。 + +**Undefined behaviour sanitizer.** +我们仍然不会在每次提交的基础上使用 UBSan。 有一些地方需要解决。 + +**Debug allocator.** +您可以使用 `DEBUG_TCMALLOC` CMake 选项启用 `tcmalloc` 的调试版本。我们在每次提交的基础上使用调试分配器运行测试。 + +更多请参阅 `dbms/tests/instructions/sanitizers.txt`。 + + +## 模糊测试 + +我们使用简单的模糊测试来生成随机SQL查询并检查服务器是否正常,使用 Address sanitizer 执行模糊测试。你可以在`00746_sql_fuzzy.pl` 找到它。 测试应连续进行(过夜和更长时间)。 + +截至2018年12月,我们仍然不使用库代码的孤立模糊测试。 + +## 安全审计 + +Yandex Cloud 部门的人员从安全角度对 ClickHouse 功能进行了一些基本概述。 + + +## 静态分析 + +我们偶尔使用静态分析。我们已经评估过 `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`。您将在 `dbms/tests/instructions/` 目录中找到使用说明。你也可以阅读[俄文文章](https://habr.com/company/yandex/blog/342018/). + +如果您使用 `CLion` 作为 IDE,您可以开箱即用一些 `clang-tidy` 检查。 + +## 其他强化 + +默认情况下使用 `FORTIFY_SOURCE`。它几乎没用,但在极少数情况下仍然有意义,我们不会禁用它。 + + +## 代码风格 + +代码风格在[这里](https://clickhouse.yandex/docs/en/development/style/) 有说明。 + +要检查一些常见的样式冲突,您可以使用 `utils/check-style` 脚本。 + +为了强制你的代码的正确风格,你可以使用 `clang-format` 文件。`.clang-format` 位于源代码根目录, 它主要与我们的实际代码风格对应。但不建议将 `clang-format` 应用于现有文件,因为它会使格式变得更糟。您可以使用 `clang-format-diff` 工具,您可以在 clang 源代码库中找到 + +或者,您可以尝试`uncrustify` 工具来格式化您的代码。配置文件在源代码的根目录中的`uncrustify.cfg`。它比 `clang-format` 经过更少的测试。 + +`CLion` 有自己的代码格式化程序,必须调整为我们的代码风格。 + + +## Metrica B2B 测试 + +每个 ClickHouse 版本都经过 Yandex Metrica 和 AppMetrica 引擎的测试。测试和稳定版本的 ClickHouse 部署在虚拟机上,并使用处理输入数据固定样本的度量引擎的小副本运行。 将度量引擎的两个实例的结果一起进行比较 + +这些测试是由单独的团队自动完成的。由于移动部件的数量很多,大部分时间的测试都是完全无关的,很难弄清楚。很可能这些测试对我们来说是负值。然而,这些测试被证明是有用的大约一个或两个倍的数百。 + + +## 测试覆盖率 + +截至2018年7月,我们不会跟踪测试复盖率。 + + +## 自动化测试 + +我们使用 Yandex 内部 CI 和名为"沙箱"的作业自动化系统运行测试。 我们还继续使用 Jenkins(可在Yandex内部使用)。 + +构建作业和测试在沙箱中按每次提交的基础上运行。结果包和测试结果发布在 GitHub 上,可以通过直接链接下载,结果会被永久存储。当您在 GitHub 上发送拉取请求时,我们将其标记为"可以测试",我们的 CI 系统将为您构建 ClickHouse 包(发布,调试,地址消除等)。 + +由于时间和计算能力的限制,我们不使用 Travis CI。 + +在 Jenkins,我们运行字典测试,指标B2B测试。 我们使用 Jenkins 来准备和发布版本。 Jenkins是一种传统的技术,所有的工作将被转移到沙箱中。 + +[来源文章](https://clickhouse.yandex/docs/zh/development/tests/) From 1c2140515f3cf9ea872ebd082faeb396724cec63 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 11 Jan 2019 11:24:40 +0300 Subject: [PATCH 31/56] fix mistype --- docs/toc_zh.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/toc_zh.yml b/docs/toc_zh.yml index 8faba962c4c..764195a3f04 100644 --- a/docs/toc_zh.yml +++ b/docs/toc_zh.yml @@ -180,7 +180,7 @@ nav: - 'hidden': 'development/index.md' - 'ClickHouse架构概述': 'development/architecture.md' - '如何在Linux中编译ClickHouse': 'development/build.md' - - '如何在Mac OS X中编译ClickHouse: 'development/build_osx.md' + - '如何在Mac OS X中编译ClickHouse': 'development/build_osx.md' - '如何编写C++代码': 'development/style.md' - '如何运行ClickHouse测试': 'development/tests.md' From baafb84018da8b499ffbe927e6b1dc35738d5c8d Mon Sep 17 00:00:00 2001 From: BayoNet Date: Fri, 11 Jan 2019 12:43:03 +0300 Subject: [PATCH 32/56] merge_tree settings (#4024) * Update of english version of descriprion of the table function `file`. * New syntax for ReplacingMergeTree. Some improvements in text. * Significantly change article about SummingMergeTree. Article is restructured, text is changed in many places of the document. New syntax for table creation is described. * Descriptions of AggregateFunction and AggregatingMergeTree are updated. Russian version. * New syntax for new syntax of CREATE TABLE * Added english docs on Aggregating, Replacing and SummingMergeTree. * CollapsingMergeTree docs. English version. * 1. Update of CollapsingMergeTree. 2. Minor changes in markup * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatefunction.md * Update aggregatingmergetree.md * GraphiteMergeTree docs update. New syntax for creation of Replicated* tables. Minor changes in *MergeTree tables creation syntax. * Markup fix * Markup and language fixes * Clarification in the CollapsingMergeTree article * DOCAPI-4821. Sync between ru and en versions of docs. * Fixed the ambiguity in geo functions description. * Example of JOIN in ru docs * Deleted misinforming example. * Fixed links to IN operators. * Updated the description of ALTER MODIFY. * [RU] Updated ALTER MODIFY description. * merge_tree_* settings description. * Fixed link anchor. --- .../en/operations/server_settings/settings.md | 4 +- docs/en/operations/settings/settings.md | 54 ++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 76fe4a62bb1..6cb2de77ba9 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -625,11 +625,11 @@ Path to temporary data for processing large queries. ``` -## uncompressed_cache_size +## uncompressed_cache_size {#server-settings-uncompressed_cache_size} Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../operations/table_engines/mergetree.md). -There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md) is enabled. +There is one shared cache for the server. Memory is allocated on demand. The cache is used if the option [use_uncompressed_cache](../settings/settings.md#setting-use_uncompressed_cache) is enabled. The uncompressed cache is advantageous for very short queries in individual cases. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index add73212c5d..36a4d6bc135 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -110,6 +110,56 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block However, the block size cannot be more than `max_block_size` rows. Disabled by default (set to 0). It only works when reading from MergeTree engines. +## merge_tree_uniform_read_distribution {#setting-merge_tree_uniform_read_distribution} + +When reading from [MergeTree*](../table_engines/mergetree.md) tables, ClickHouse uses several threads. This setting turns on/off the uniform distribution of reading tasks over the working threads. The algorithm of the uniform distribution aims to make execution time for all the threads approximately equal in a `SELECT` query. + +**Possible values** + +- 0 — Uniform read distribution turned off. +- 1 — Uniform read distribution turned on. + +**Default value** — 1. + +## merge_tree_min_rows_for_concurrent_read {#setting-merge_tree_min_rows_for_concurrent_read} + +If a number of rows to be read from a file of [MergeTree*](../table_engines/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file by several threads. + +**Possible values** + +Any positive integer. + +**Default value** — 163840. + +## merge_tree_min_rows_for_seek {#setting-merge_tree_min_rows_for_seek} + +If the distance between two data blocks to be read in one file less than `merge_tree_min_rows_for_seek` rows, then ClickHouse does not seek through the file, it reads the data sequentially. + +**Possible values** + +Any positive integer. + +**Default value** — 0. + +## merge_tree_coarse_index_granularity {#setting-merge_tree_coarse_index_granularity} + +When searching data, ClickHouse checks the data marks in the index file. If ClickHouse finds that required keys are in some range, it divides this range for `merge_tree_coarse_index_granularity` subranges and searches the required keys there recursively. + +**Possible values** + +Any positive even integer. + +**Default value** — 8. + +## merge_tree_max_rows_to_use_cache {#setting-merge_tree_max_rows_to_use_cache} + +If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cash of uncompressed blocks. The [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks. + +**Possible values** + +Any positive integer. + +**Default value** — 1048576. ## log_queries @@ -242,10 +292,10 @@ Whether to count extreme values (the minimums and maximums in columns of a query For more information, see the section "Extreme values". -## use_uncompressed_cache +## use_uncompressed_cache {#setting-use_uncompressed_cache} Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled). -The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the 'uncompressed_cache_size' configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. +The uncompressed cache (only for tables in the MergeTree family) allows significantly reducing latency and increasing throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../server_settings/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed; the least-used data is automatically deleted. For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically in order to save space for truly small queries. So you can keep the 'use_uncompressed_cache' setting always set to 1. From 497b6ec2adc78daf4797916ba713a02fd7b4f4bd Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 11 Jan 2019 15:40:19 +0300 Subject: [PATCH 33/56] Better compile with low memory; Fix macos build, add utils/build/build_debian_unbundled_split.sh test script (#4030) --- CMakeLists.txt | 15 +++++++++------ cmake/find_ssl.cmake | 2 +- cmake/limit_jobs.cmake | 12 ++++++------ dbms/CMakeLists.txt | 16 ++++++++++------ dbms/programs/copier/ClusterCopier.cpp | 2 +- utils/CMakeLists.txt | 10 +++++----- utils/build/build_debian_unbundled.sh | 2 +- utils/build/build_debian_unbundled_split.sh | 6 ++++++ 8 files changed, 39 insertions(+), 26 deletions(-) create mode 100755 utils/build/build_debian_unbundled_split.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b77687c645..ef504c107bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,8 +90,6 @@ if (GLIBC_COMPATIBILITY) set (USE_INTERNAL_MEMCPY ON) endif () -set (COMPILER_FLAGS "${COMPILER_FLAGS}") - string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER}) find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld") @@ -108,10 +106,15 @@ if (LINKER_NAME) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") endif () -option (PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON) -if (PIPE) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -pipe") -endif () +cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd +if(NOT AVAILABLE_PHYSICAL_MEMORY OR AVAILABLE_PHYSICAL_MEMORY GREATER 8000) + option(COMPILER_PIPE "-pipe compiler option [less /tmp usage, more ram usage]" ON) +endif() +if(COMPILER_PIPE) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -pipe") +else() + message(STATUS "Disabling compiler -pipe option (have only ${AVAILABLE_PHYSICAL_MEMORY} mb of memory)") +endif() include (cmake/test_cpu.cmake) diff --git a/cmake/find_ssl.cmake b/cmake/find_ssl.cmake index d159bcfba23..4af11e033fe 100644 --- a/cmake/find_ssl.cmake +++ b/cmake/find_ssl.cmake @@ -6,7 +6,7 @@ set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) if (NOT USE_INTERNAL_SSL_LIBRARY) if (APPLE) - set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl") + set (OPENSSL_ROOT_DIR "/usr/local/opt/openssl" CACHE INTERNAL "") # https://rt.openssl.org/Ticket/Display.html?user=guest&pass=guest&id=2232 if (USE_STATIC_LIBRARIES) message(WARNING "Disable USE_STATIC_LIBRARIES if you have linking problems with OpenSSL on MacOS") diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 1f05dad912b..4f305bfb4c3 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -4,6 +4,7 @@ # include (cmake/limit_jobs.cmake) cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd +cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) option(PARALLEL_COMPILE_JOBS "Define the maximum number of concurrent compilation jobs" "") if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) @@ -12,7 +13,7 @@ if (NOT PARALLEL_COMPILE_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_COMPILER_MEM set (PARALLEL_COMPILE_JOBS 1) endif () endif () -if (PARALLEL_COMPILE_JOBS) +if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) @@ -25,13 +26,12 @@ if (NOT PARALLEL_LINK_JOBS AND AVAILABLE_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) set (PARALLEL_LINK_JOBS 1) endif () endif () -if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) - message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") -endif () - -if (LLVM_PARALLEL_LINK_JOBS) +if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR}) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK}) set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS}) endif () +if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) + message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") +endif () diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 84c4b76d6fb..7e39fd2f7af 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -2,13 +2,17 @@ if (USE_INCLUDE_WHAT_YOU_USE) set (CMAKE_CXX_INCLUDE_WHAT_YOU_USE ${IWYU_PATH}) endif () -set (MAX_COMPILER_MEMORY 2500 CACHE INTERNAL "") -if (MAKE_STATIC_LIBRARIES) - set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +if(COMPILER_PIPE) + set(MAX_COMPILER_MEMORY 2500) else() - set (MAX_LINKER_MEMORY 2500 CACHE INTERNAL "") -endif () -include (../cmake/limit_jobs.cmake) + set(MAX_COMPILER_MEMORY 1500) +endif() +if(MAKE_STATIC_LIBRARIES) + set(MAX_LINKER_MEMORY 3500) +else() + set(MAX_LINKER_MEMORY 2500) +endif() +include(../cmake/limit_jobs.cmake) include(cmake/find_vectorclass.cmake) diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 203a7f0cebb..4f285c83f17 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -1997,7 +1997,7 @@ protected: }; { - ThreadPool thread_pool(std::min(num_shards, UInt64(getNumberOfPhysicalCPUCores()))); + ThreadPool thread_pool(std::min(num_shards, getNumberOfPhysicalCPUCores())); for (UInt64 shard_index = 0; shard_index < num_shards; ++shard_index) thread_pool.schedule([=] { do_for_shard(shard_index); }); diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 08a60cdf19c..9b0f8a6a741 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -3,12 +3,12 @@ if (NOT NO_WERROR) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") endif () -if (MAKE_STATIC_LIBRARIES) - set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") +if(MAKE_STATIC_LIBRARIES) + set(MAX_LINKER_MEMORY 3500) else() - set (MAX_LINKER_MEMORY 2500 CACHE INTERNAL "") -endif () -include (../cmake/limit_jobs.cmake) + set(MAX_LINKER_MEMORY 2500) +endif() +include(../cmake/limit_jobs.cmake) # Utils used in package add_subdirectory (config-processor) diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index 2809e1f3cc1..aeab5eef6b9 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -22,5 +22,5 @@ env TEST_RUN=1 \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ `# gdb - symbol test in pbuilder` \ - EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev $EXTRAPACKAGES" \ + EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libxml2-dev libgsasl7-dev $EXTRAPACKAGES" \ pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT diff --git a/utils/build/build_debian_unbundled_split.sh b/utils/build/build_debian_unbundled_split.sh new file mode 100755 index 00000000000..5242b0f4a6f --- /dev/null +++ b/utils/build/build_debian_unbundled_split.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +CMAKE_FLAGS+=" -DCLICKHOUSE_SPLIT_BINARY=1 " +. $CUR_DIR/build_debian_unbundled.sh From 8a0867e5601ebad3e5e0956f75081a902180fd55 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 11 Jan 2019 16:57:46 +0300 Subject: [PATCH 34/56] Removed useless and misleading messages from Poco cmake --- contrib/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ac7b287f886..9d964f288d8 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -139,6 +139,7 @@ if (USE_INTERNAL_CAPNP_LIBRARY) endif () if (USE_INTERNAL_POCO_LIBRARY) + set (POCO_VERBOSE_MESSAGES 0 CACHE INTERNAL "") set (save_CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (_save ${ENABLE_TESTS}) From c53854125f275a7867070a3154cf24732bf5ff15 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 17:09:23 +0300 Subject: [PATCH 35/56] QueryNormalizer with visitor interface --- .../PredicateExpressionsOptimizer.cpp | 5 +- dbms/src/Interpreters/QueryNormalizer.cpp | 100 ++++++++++++------ dbms/src/Interpreters/QueryNormalizer.h | 47 +++++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 3 +- 4 files changed, 107 insertions(+), 48 deletions(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index ea50ac1d6f4..d70ca0edd07 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -319,9 +319,12 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); + QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); - QueryNormalizer(ast, aliases, settings).perform(); + + QueryNormalizer::Data normalizer_data(aliases, settings); + QueryNormalizer(normalizer_data).visit(ast); for (const auto & projection_column : select_query->select_expression_list->children) { diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index c2e614753fb..ea6312f84e4 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -20,36 +20,56 @@ namespace ErrorCodes extern const int CYCLIC_ALIASES; } - -QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_, - std::vector && tables_with_columns_) - : query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_) -{} - -void QueryNormalizer::perform() +class CheckASTDepth { - SetOfASTs tmp_set; - MapOfASTs tmp_map; - performImpl(query, tmp_map, tmp_set, "", 0); - - try +public: + CheckASTDepth(QueryNormalizer::Data & data_) + : data(data_) { - query->checkSize(settings.max_expanded_ast_elements); + if (data.level > data.settings.max_ast_depth) + throw Exception("Normalized AST is too deep. Maximum: " + toString(data.settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); + ++data.level; } - catch (Exception & e) - { - e.addMessage("(after expansion of aliases)"); - throw; - } -} -/// finished_asts - already processed vertices (and by what they replaced) -/// current_asts - vertices in the current call stack of this method -/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases) -void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level) + ~CheckASTDepth() + { + --data.level; + } + +private: + QueryNormalizer::Data & data; +}; + + +class RestoreAliasOnExitScope { - if (level > settings.max_ast_depth) - throw Exception("Normalized AST is too deep. Maximum: " + toString(settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); +public: + RestoreAliasOnExitScope(String & alias_) + : alias(alias_) + , copy(alias_) + {} + + ~RestoreAliasOnExitScope() + { + alias = copy; + } + +private: + String & alias; + const String copy; +}; + + +void QueryNormalizer::visit(ASTPtr & ast, Data & data) +{ + CheckASTDepth scope1(data); + RestoreAliasOnExitScope scope2(data.current_alias); + + auto & aliases = data.aliases; + auto & tables_with_columns = data.tables_with_columns; + auto & finished_asts = data.finished_asts; + auto & current_asts = data.current_asts; + String & current_alias = data.current_alias; if (finished_asts.count(ast)) { @@ -87,7 +107,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// will be sent to remote servers during distributed query execution, /// and on all remote servers, function implementation will be same. if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct") - func_node->name = settings.count_distinct_implementation; + func_node->name = data.settings.count_distinct_implementation; /// As special case, treat count(*) as count(), not as count(list of all columns). if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1 @@ -137,7 +157,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// Replace *, alias.*, database.table.* with a list of columns. ASTs old_children; - if (processAsterisks()) + if (data.processAsterisks()) { bool has_asterisk = false; for (const auto & child : expr_list->children) @@ -206,7 +226,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. if (replaced) { - performImpl(ast, finished_asts, current_asts, current_alias, level + 1); + visit(ast, data); current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; @@ -227,7 +247,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (typeid_cast(child.get()) || typeid_cast(child.get())) continue; - performImpl(child, finished_asts, current_asts, current_alias, level + 1); + visit(child, data); } } else if (identifier_node) @@ -240,7 +260,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (typeid_cast(child.get()) || typeid_cast(child.get())) continue; - performImpl(child, finished_asts, current_asts, current_alias, level + 1); + visit(child, data); } } @@ -248,16 +268,30 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (ASTSelectQuery * select = typeid_cast(ast.get())) { if (select->prewhere_expression) - performImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->prewhere_expression, data); if (select->where_expression) - performImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->where_expression, data); if (select->having_expression) - performImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->having_expression, data); } current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; + + /// @note can not place it in CheckASTDepth dror cause of throw. + if (data.level == 1) + { + try + { + ast->checkSize(data.settings.max_expanded_ast_elements); + } + catch (Exception & e) + { + e.addMessage("(after expansion of aliases)"); + throw; + } + } } } diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index a63fde8bb73..55d33931d2f 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -39,23 +39,44 @@ public: using Aliases = std::unordered_map; using TableWithColumnNames = std::pair; - QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, - std::vector && tables_with_columns = {}); + struct Data + { + using SetOfASTs = std::set; + using MapOfASTs = std::map; - void perform(); + const Aliases & aliases; + const ExtractedSettings settings; + const std::vector tables_with_columns; + + /// tmp data + size_t level; + MapOfASTs finished_asts; /// already processed vertices (and by what they replaced) + SetOfASTs current_asts; /// vertices in the current call stack of this method + std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) + + Data(const Aliases & aliases_, ExtractedSettings && settings_, std::vector && tables_with_columns_ = {}) + : aliases(aliases_) + , settings(settings_) + , tables_with_columns(tables_with_columns_) + , level(0) + {} + + bool processAsterisks() const { return !tables_with_columns.empty(); } + }; + + QueryNormalizer(Data & data) + : visitor_data(data) + {} + + void visit(ASTPtr & ast) + { + visit(ast, visitor_data); + } private: - using SetOfASTs = std::set; - using MapOfASTs = std::map; + Data & visitor_data; - ASTPtr & query; - const Aliases & aliases; - const ExtractedSettings settings; - const std::vector tables_with_columns; - - bool processAsterisks() const { return !tables_with_columns.empty(); } - - void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level); + void visit(ASTPtr & query, Data & data); }; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 0dc9bd3670e..5b40200c019 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -147,7 +147,8 @@ void normalizeTree( else table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform(); + QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef(), std::move(table_with_columns)); + QueryNormalizer(normalizer_data).visit(query); } bool hasArrayJoin(const ASTPtr & ast) From e4fce8291e8d335795a6d0c260cad1c75ff5dc52 Mon Sep 17 00:00:00 2001 From: Nikolay Kirsh Date: Fri, 11 Jan 2019 18:27:45 +0300 Subject: [PATCH 36/56] Update copyright (#4039) 2019 have started --- docs/tools/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tools/build.py b/docs/tools/build.py index e3c90f2b956..e7e1c777b42 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -99,7 +99,7 @@ def build_for_lang(lang, args): site_dir=os.path.join(args.output_dir, lang), strict=True, theme=theme_cfg, - copyright='©2016–2018 Yandex LLC', + copyright='©2016–2019 Yandex LLC', use_directory_urls=True, repo_name='yandex/ClickHouse', repo_url='https://github.com/yandex/ClickHouse/', From 7cefef698355b7df14dd4a8ef7e6b1a30f92ebeb Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 11 Jan 2019 19:52:46 +0300 Subject: [PATCH 37/56] Fix linking in all modes (unbundled, split, ...) (#4037) --- dbms/programs/client/CMakeLists.txt | 2 +- dbms/programs/odbc-bridge/CMakeLists.txt | 2 +- .../{TableFunctions => Common}/parseRemoteDescription.cpp | 2 +- .../{TableFunctions => Common}/parseRemoteDescription.h | 0 dbms/src/Dictionaries/CMakeLists.txt | 8 ++------ dbms/src/Functions/CMakeLists.txt | 8 +++++--- dbms/src/Storages/StorageHDFS.cpp | 2 +- dbms/src/TableFunctions/TableFunctionRemote.cpp | 6 +++--- 8 files changed, 14 insertions(+), 16 deletions(-) rename dbms/src/{TableFunctions => Common}/parseRemoteDescription.cpp (99%) rename dbms/src/{TableFunctions => Common}/parseRemoteDescription.h (100%) diff --git a/dbms/programs/client/CMakeLists.txt b/dbms/programs/client/CMakeLists.txt index f3dd518e891..c5c5cdc664f 100644 --- a/dbms/programs/client/CMakeLists.txt +++ b/dbms/programs/client/CMakeLists.txt @@ -1,5 +1,5 @@ add_library (clickhouse-client-lib ${LINK_MODE} Client.cpp) -target_link_libraries (clickhouse-client-lib PRIVATE clickhouse_common_io clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) if (READLINE_INCLUDE_DIR) target_include_directories (clickhouse-client-lib SYSTEM PRIVATE ${READLINE_INCLUDE_DIR}) endif () diff --git a/dbms/programs/odbc-bridge/CMakeLists.txt b/dbms/programs/odbc-bridge/CMakeLists.txt index a57c8c9c8cf..d57a41ebfc6 100644 --- a/dbms/programs/odbc-bridge/CMakeLists.txt +++ b/dbms/programs/odbc-bridge/CMakeLists.txt @@ -9,7 +9,7 @@ add_library (clickhouse-odbc-bridge-lib ${LINK_MODE} validateODBCConnectionString.cpp ) -target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_common_io daemon dbms) +target_link_libraries (clickhouse-odbc-bridge-lib PRIVATE clickhouse_dictionaries daemon dbms clickhouse_common_io) target_include_directories (clickhouse-odbc-bridge-lib PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) if (USE_POCO_SQLODBC) diff --git a/dbms/src/TableFunctions/parseRemoteDescription.cpp b/dbms/src/Common/parseRemoteDescription.cpp similarity index 99% rename from dbms/src/TableFunctions/parseRemoteDescription.cpp rename to dbms/src/Common/parseRemoteDescription.cpp index 1dce4c6f6e1..7c41ac6e38b 100644 --- a/dbms/src/TableFunctions/parseRemoteDescription.cpp +++ b/dbms/src/Common/parseRemoteDescription.cpp @@ -1,4 +1,4 @@ -#include +#include "parseRemoteDescription.h" #include #include diff --git a/dbms/src/TableFunctions/parseRemoteDescription.h b/dbms/src/Common/parseRemoteDescription.h similarity index 100% rename from dbms/src/TableFunctions/parseRemoteDescription.h rename to dbms/src/Common/parseRemoteDescription.h diff --git a/dbms/src/Dictionaries/CMakeLists.txt b/dbms/src/Dictionaries/CMakeLists.txt index 08624bd6c6a..d7f85a5c7eb 100644 --- a/dbms/src/Dictionaries/CMakeLists.txt +++ b/dbms/src/Dictionaries/CMakeLists.txt @@ -20,9 +20,7 @@ endif() if(USE_POCO_SQLODBC) target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_SQLODBC_LIBRARY} ${Poco_SQL_LIBRARY}) - if (NOT USE_INTERNAL_POCO_LIBRARY) - target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} ${Poco_SQL_INCLUDE_DIR}) - endif() + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_SQLODBC_INCLUDE_DIR} ${Poco_SQL_INCLUDE_DIR}) endif() if(Poco_Data_FOUND) @@ -31,9 +29,7 @@ endif() if(USE_POCO_DATAODBC) target_link_libraries(clickhouse_dictionaries PRIVATE ${Poco_DataODBC_LIBRARY} ${Poco_Data_LIBRARY}) - if (NOT USE_INTERNAL_POCO_LIBRARY) - target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR}) - endif() + target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${ODBC_INCLUDE_DIRECTORIES} ${Poco_DataODBC_INCLUDE_DIR}) endif() if(USE_POCO_MONGODB) diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index b4dcaa49410..f210800d279 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -47,9 +47,11 @@ if (ENABLE_TESTS) endif () if (USE_EMBEDDED_COMPILER) - target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) + llvm_libs_all(REQUIRED_LLVM_LIBRARIES) + target_link_libraries(clickhouse_functions PRIVATE ${REQUIRED_LLVM_LIBRARIES}) + target_include_directories(clickhouse_functions SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS}) endif () -if (USE_BASE64) - target_include_directories (clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR}) +if(USE_BASE64) + target_include_directories(clickhouse_functions SYSTEM PRIVATE ${BASE64_INCLUDE_DIR}) endif() diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 2e94f8bd211..97b0af65f87 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index b93a1638d48..fc23956ef4f 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -1,3 +1,5 @@ +#include "TableFunctionRemote.h" + #include #include #include @@ -8,10 +10,8 @@ #include #include #include - -#include +#include #include -#include namespace DB From 0ba1db8b2715d07ae6dd5360757ddffdb0a701da Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 11 Jan 2019 22:03:23 +0300 Subject: [PATCH 38/56] CLICKHOUSE-4249: Limit compiled expressions cache --- dbms/programs/server/Server.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 499f233ff28..f0fd80669c0 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -411,7 +411,7 @@ int Server::main(const std::vector & /*args*/) global_context->setMarkCache(mark_cache_size); #if USE_EMBEDDED_COMPILER - size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", std::numeric_limits::max()); + size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500); if (compiled_expression_cache_size) global_context->setCompiledExpressionCache(compiled_expression_cache_size); #endif From e098348aa4ca0d29f7b0f29459d37e3c54bf6752 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 20:14:17 +0300 Subject: [PATCH 39/56] more QueryNormalizer refactoring: split visit function --- dbms/src/Interpreters/QueryNormalizer.cpp | 407 ++++++++++++---------- dbms/src/Interpreters/QueryNormalizer.h | 16 +- 2 files changed, 231 insertions(+), 192 deletions(-) diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index ea6312f84e4..328b3f6bf6a 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -60,16 +60,213 @@ private: }; +void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) +{ + auto & aliases = data.aliases; + String & func_name = node.name; + ASTPtr & func_arguments = node.arguments; + + /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. + if (functionIsInOrGlobalInOperator(func_name)) + if (ASTIdentifier * right = typeid_cast(func_arguments->children.at(1).get())) + if (!aliases.count(right->name)) + right->setSpecial(); + + /// Special cases for count function. + String func_name_lowercase = Poco::toLower(func_name); + if (startsWith(func_name_lowercase, "count")) + { + /// Select implementation of countDistinct based on settings. + /// Important that it is done as query rewrite. It means rewritten query + /// will be sent to remote servers during distributed query execution, + /// and on all remote servers, function implementation will be same. + if (endsWith(func_name, "Distinct") && func_name_lowercase == "countdistinct") + func_name = data.settings.count_distinct_implementation; + + /// As special case, treat count(*) as count(), not as count(list of all columns). + if (func_name_lowercase == "count" && func_arguments->children.size() == 1 + && typeid_cast(func_arguments->children[0].get())) + { + func_arguments->children.clear(); + } + } +} + +void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) +{ + auto & current_asts = data.current_asts; + String & current_alias = data.current_alias; + + if (!node.general()) + return; + + /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). + auto it_alias = data.aliases.find(node.name); + if (it_alias != data.aliases.end() && current_alias != node.name) + { + auto & alias_node = it_alias->second; + + /// Let's replace it with the corresponding tree node. + if (current_asts.count(alias_node.get())) + throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); + + String my_alias = ast->tryGetAlias(); + if (!my_alias.empty() && my_alias != alias_node->getAliasOrColumnName()) + { + /// Avoid infinite recursion here + auto replace_to_identifier = typeid_cast(alias_node.get()); + bool is_cycle = replace_to_identifier && replace_to_identifier->general() + && replace_to_identifier->name == node.name; + + if (!is_cycle) + { + /// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a. + ast = alias_node->clone(); + ast->setAlias(my_alias); + } + } + else + ast = alias_node; + } +} + +/// Replace *, alias.*, database.table.* with a list of columns. +void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) +{ + auto & tables_with_columns = data.tables_with_columns; + + ASTs old_children; + if (data.processAsterisks()) + { + bool has_asterisk = false; + for (const auto & child : node.children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + { + has_asterisk = true; + break; + } + } + + if (has_asterisk) + { + old_children.swap(node.children); + node.children.reserve(old_children.size()); + } + } + + for (const auto & child : old_children) + { + if (typeid_cast(child.get())) + { + for (const auto & pr : tables_with_columns) + for (const auto & column_name : pr.second) + node.children.emplace_back(std::make_shared(column_name)); + } + else if (const auto * qualified_asterisk = typeid_cast(child.get())) + { + const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); + size_t num_components = identifier->children.size(); + + for (const auto & [table_name, table_columns] : tables_with_columns) + { + if ((num_components == 2 /// database.table.* + && !table_name.database.empty() /// This is normal (not a temporary) table. + && static_cast(*identifier->children[0]).name == table_name.database + && static_cast(*identifier->children[1]).name == table_name.table) + || (num_components == 0 /// t.* + && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* + || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* + { + for (const auto & column_name : table_columns) + node.children.emplace_back(std::make_shared(column_name)); + break; + } + } + } + else + node.children.emplace_back(child); + } +} + +/// mark table identifiers as 'not columns' +void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data &) +{ + if (node.table_expression) + { + auto & database_and_table_name = static_cast(*node.table_expression).database_and_table_name; + if (database_and_table_name) + if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) + right->setSpecial(); + } +} + +/// special visitChildren() for ASTSelectQuery +void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) +{ + for (auto & child : ast->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + + /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, + /// but also in where_expression and having_expression. + if (select.prewhere_expression) + visit(select.prewhere_expression, data); + if (select.where_expression) + visit(select.where_expression, data); + if (select.having_expression) + visit(select.having_expression, data); +} + +/// Don't go into subqueries. +/// Don't go into components of compound identifiers. +/// Don't go into select query. It processes children itself. +/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters +/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). +void QueryNormalizer::visitChildren(const ASTPtr & node, Data & data) +{ + ASTFunction * func_node = typeid_cast(node.get()); + if (func_node && func_node->name == "lambda") + { + /// We skip the first argument. We also assume that the lambda function can not have parameters. + for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i) + { + auto & child = func_node->arguments->children[i]; + + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + } + else if (!typeid_cast(node.get()) && + !typeid_cast(node.get())) + { + for (auto & child : node->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + } +} + void QueryNormalizer::visit(ASTPtr & ast, Data & data) { CheckASTDepth scope1(data); RestoreAliasOnExitScope scope2(data.current_alias); - auto & aliases = data.aliases; - auto & tables_with_columns = data.tables_with_columns; auto & finished_asts = data.finished_asts; auto & current_asts = data.current_asts; - String & current_alias = data.current_alias; if (finished_asts.count(ast)) { @@ -80,206 +277,34 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) ASTPtr initial_ast = ast; current_asts.insert(initial_ast.get()); - String my_alias = ast->tryGetAlias(); - if (!my_alias.empty()) - current_alias = my_alias; - - /// rewrite rules that act when you go from top to bottom. - bool replaced = false; - - ASTIdentifier * identifier_node = nullptr; - ASTFunction * func_node = nullptr; - - if ((func_node = typeid_cast(ast.get()))) { - /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. - if (functionIsInOrGlobalInOperator(func_node->name)) - if (ASTIdentifier * right = typeid_cast(func_node->arguments->children.at(1).get())) - if (!aliases.count(right->name)) - right->setSpecial(); - - /// Special cases for count function. - String func_name_lowercase = Poco::toLower(func_node->name); - if (startsWith(func_name_lowercase, "count")) - { - /// Select implementation of countDistinct based on settings. - /// Important that it is done as query rewrite. It means rewritten query - /// will be sent to remote servers during distributed query execution, - /// and on all remote servers, function implementation will be same. - if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct") - func_node->name = data.settings.count_distinct_implementation; - - /// As special case, treat count(*) as count(), not as count(list of all columns). - if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1 - && typeid_cast(func_node->arguments->children[0].get())) - { - func_node->arguments->children.clear(); - } - } + String my_alias = ast->tryGetAlias(); + if (!my_alias.empty()) + data.current_alias = my_alias; } - else if ((identifier_node = typeid_cast(ast.get()))) - { - if (identifier_node->general()) - { - /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). - auto it_alias = aliases.find(identifier_node->name); - if (it_alias != aliases.end() && current_alias != identifier_node->name) - { - /// Let's replace it with the corresponding tree node. - if (current_asts.count(it_alias->second.get())) - throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); - if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName()) - { - /// Avoid infinite recursion here - auto replace_to_identifier = typeid_cast(it_alias->second.get()); - bool is_cycle = replace_to_identifier && replace_to_identifier->general() - && replace_to_identifier->name == identifier_node->name; - - if (!is_cycle) - { - /// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a. - ast = it_alias->second->clone(); - ast->setAlias(my_alias); - replaced = true; - } - } - else - { - ast = it_alias->second; - replaced = true; - } - } - } - } - else if (ASTExpressionList * expr_list = typeid_cast(ast.get())) - { - /// Replace *, alias.*, database.table.* with a list of columns. - - ASTs old_children; - if (data.processAsterisks()) - { - bool has_asterisk = false; - for (const auto & child : expr_list->children) - { - if (typeid_cast(child.get()) || - typeid_cast(child.get())) - { - has_asterisk = true; - break; - } - } - - if (has_asterisk) - { - old_children.swap(expr_list->children); - expr_list->children.reserve(old_children.size()); - } - } - - for (const auto & child : old_children) - { - if (typeid_cast(child.get())) - { - for (const auto & pr : tables_with_columns) - for (const auto & column_name : pr.second) - expr_list->children.emplace_back(std::make_shared(column_name)); - } - else if (const auto * qualified_asterisk = typeid_cast(child.get())) - { - const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); - size_t num_components = identifier->children.size(); - - for (const auto & [table_name, table_columns] : tables_with_columns) - { - if ((num_components == 2 /// database.table.* - && !table_name.database.empty() /// This is normal (not a temporary) table. - && static_cast(*identifier->children[0]).name == table_name.database - && static_cast(*identifier->children[1]).name == table_name.table) - || (num_components == 0 /// t.* - && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* - || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* - { - for (const auto & column_name : table_columns) - expr_list->children.emplace_back(std::make_shared(column_name)); - break; - } - } - } - else - expr_list->children.emplace_back(child); - } - } - else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast(ast.get())) - { - if (tables_elem->table_expression) - { - auto & database_and_table_name = static_cast(*tables_elem->table_expression).database_and_table_name; - if (database_and_table_name) - { - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) - right->setSpecial(); - } - } - } + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. - if (replaced) - { + if (ast.get() != initial_ast.get()) visit(ast, data); - current_asts.erase(initial_ast.get()); - current_asts.erase(ast.get()); - finished_asts[initial_ast] = ast; - return; - } - - /// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers. - /// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters - /// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). - - if (func_node && func_node->name == "lambda") - { - /// We skip the first argument. We also assume that the lambda function can not have parameters. - for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i) - { - auto & child = func_node->arguments->children[i]; - - if (typeid_cast(child.get()) || typeid_cast(child.get())) - continue; - - visit(child, data); - } - } - else if (identifier_node) - { - } else - { - for (auto & child : ast->children) - { - if (typeid_cast(child.get()) || typeid_cast(child.get())) - continue; - - visit(child, data); - } - } - - /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression. - if (ASTSelectQuery * select = typeid_cast(ast.get())) - { - if (select->prewhere_expression) - visit(select->prewhere_expression, data); - if (select->where_expression) - visit(select->where_expression, data); - if (select->having_expression) - visit(select->having_expression, data); - } + visitChildren(ast, data); current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; - /// @note can not place it in CheckASTDepth dror cause of throw. + /// @note can not place it in CheckASTDepth dtor cause of exception. if (data.level == 1) { try diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 55d33931d2f..3e55e0253e6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -18,6 +18,12 @@ inline bool functionIsInOrGlobalInOperator(const String & name) } +class ASTFunction; +class ASTIdentifier; +class ASTExpressionList; +struct ASTTablesInSelectQueryElement; + + class QueryNormalizer { /// Extracts settings, mostly to show which are used and which are not. @@ -76,7 +82,15 @@ public: private: Data & visitor_data; - void visit(ASTPtr & query, Data & data); + static void visit(ASTPtr & query, Data & data); + + static void visit(ASTIdentifier &, ASTPtr &, Data &); + static void visit(ASTFunction &, const ASTPtr &, Data &); + static void visit(ASTExpressionList &, const ASTPtr &, Data &); + static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); + static void visit(ASTSelectQuery &, const ASTPtr &, Data &); + + static void visitChildren(const ASTPtr &, Data & data); }; } From b759219243400ca304de920935e3e767f64e9636 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 22:33:00 +0300 Subject: [PATCH 40/56] fix crash on wrong data --- dbms/src/Interpreters/ColumnNamesContext.h | 26 +++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/ColumnNamesContext.h b/dbms/src/Interpreters/ColumnNamesContext.h index 63ad5a4e2e4..a605903580f 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.h +++ b/dbms/src/Interpreters/ColumnNamesContext.h @@ -16,18 +16,21 @@ struct ColumnNamesContext { struct JoinedTable { - const ASTTableExpression * expr; - const ASTTableJoin * join; + const ASTTableExpression * expr = nullptr; + const ASTTableJoin * join = nullptr; std::optional alias() const { String alias; - if (expr->database_and_table_name) - alias = expr->database_and_table_name->tryGetAlias(); - else if (expr->table_function) - alias = expr->table_function->tryGetAlias(); - else if (expr->subquery) - alias = expr->subquery->tryGetAlias(); + if (expr) + { + if (expr->database_and_table_name) + alias = expr->database_and_table_name->tryGetAlias(); + else if (expr->table_function) + alias = expr->table_function->tryGetAlias(); + else if (expr->subquery) + alias = expr->subquery->tryGetAlias(); + } if (!alias.empty()) return alias; return {}; @@ -35,9 +38,10 @@ struct ColumnNamesContext std::optional name() const { - if (auto * node = expr->database_and_table_name.get()) - if (auto * identifier = typeid_cast(node)) - return identifier->name; + if (expr) + if (auto * node = expr->database_and_table_name.get()) + if (auto * identifier = typeid_cast(node)) + return identifier->name; return {}; } From 00200615e81c3b29039b8c30e30ec423cc47eb63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 12 Jan 2019 00:16:49 +0300 Subject: [PATCH 41/56] Trying to remove redundant objects from CompiledExpressionsCache (LLVMFunction), also remove CompiledExpressionsCacheBytes metric because it's not connected with real size --- dbms/src/Interpreters/AsynchronousMetrics.cpp | 3 - dbms/src/Interpreters/ExpressionJIT.cpp | 137 ++++++------------ dbms/src/Interpreters/ExpressionJIT.h | 21 ++- 3 files changed, 52 insertions(+), 109 deletions(-) diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 9fdcf1d4e13..283021cbeb6 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -137,10 +137,7 @@ void AsynchronousMetrics::update() #if USE_EMBEDDED_COMPILER { if (auto compiled_expression_cache = context.getCompiledExpressionCache()) - { - set("CompiledExpressionCacheBytes", compiled_expression_cache->weight()); set("CompiledExpressionCacheCount", compiled_expression_cache->count()); - } } #endif diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 9ac95e3a107..6939274fadd 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -189,70 +189,36 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr) } #endif -#if LLVM_VERSION_MAJOR >= 6 -struct CountingMMapper final : public llvm::SectionMemoryManager::MemoryMapper -{ - MemoryTracker memory_tracker{VariableContext::Global}; - - llvm::sys::MemoryBlock allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose /*purpose*/, - size_t num_bytes, - const llvm::sys::MemoryBlock * const near_block, - unsigned flags, - std::error_code & error_code) override - { - memory_tracker.alloc(num_bytes); - return llvm::sys::Memory::allocateMappedMemory(num_bytes, near_block, flags, error_code); - } - - std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & block, unsigned flags) override - { - return llvm::sys::Memory::protectMappedMemory(block, flags); - } - - std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & block) override - { - memory_tracker.free(block.size()); - return llvm::sys::Memory::releaseMappedMemory(block); - } -}; +#if LLVM_VERSION_MAJOR >= 7 +using ModulePtr = std::shared_ptr; +#else +using ModulePtr = std::shared_ptr; #endif struct LLVMContext { - static inline std::atomic id_counter{0}; - llvm::LLVMContext context; + std::shared_ptr context; #if LLVM_VERSION_MAJOR >= 7 llvm::orc::ExecutionSession execution_session; - std::unique_ptr module; -#else - std::shared_ptr module; #endif + ModulePtr module; std::unique_ptr machine; -#if LLVM_VERSION_MAJOR >= 6 - std::unique_ptr memory_mapper; -#endif std::shared_ptr memory_manager; llvm::orc::RTDyldObjectLinkingLayer object_layer; llvm::orc::IRCompileLayer compile_layer; llvm::DataLayout layout; llvm::IRBuilder<> builder; std::unordered_map symbols; - size_t id; LLVMContext() + : context(std::make_shared()) #if LLVM_VERSION_MAJOR >= 7 - : module(std::make_unique("jit", context)) + , module(std::make_unique("jit", *context)) #else - : module(std::make_shared("jit", context)) + , module(std::make_shared("jit", *context)) #endif , machine(getNativeMachine()) - -#if LLVM_VERSION_MAJOR >= 6 - , memory_mapper(std::make_unique()) - , memory_manager(std::make_shared(memory_mapper.get())) -#else , memory_manager(std::make_shared()) -#endif #if LLVM_VERSION_MAJOR >= 7 , object_layer(execution_session, [this](llvm::orc::VModuleKey) { @@ -263,18 +229,17 @@ struct LLVMContext #endif , compile_layer(object_layer, llvm::orc::SimpleCompiler(*machine)) , layout(machine->createDataLayout()) - , builder(context) - , id(id_counter++) + , builder(*context) { module->setDataLayout(layout); module->setTargetTriple(machine->getTargetTriple().getTriple()); } /// returns used memory - size_t compileAllFunctionsToNativeCode() + void compileAllFunctionsToNativeCode() { if (!module->size()) - return 0; + return; llvm::PassManagerBuilder pass_manager_builder; llvm::legacy::PassManager mpm; llvm::legacy::FunctionPassManager fpm(module.get()); @@ -323,26 +288,20 @@ struct LLVMContext throw Exception("Function " + name + " failed to link", ErrorCodes::CANNOT_COMPILE_CODE); symbols[name] = reinterpret_cast(*address); } -#if LLVM_VERSION_MAJOR >= 6 - return memory_mapper->memory_tracker.get(); -#else - return 0; -#endif } }; class LLVMPreparedFunction : public PreparedFunctionImpl { std::string name; - std::shared_ptr context; void * function; public: - LLVMPreparedFunction(std::string name_, std::shared_ptr context) - : name(std::move(name_)), context(context) + LLVMPreparedFunction(const std::string & name_, const std::unordered_map & symbols) + : name(name_) { - auto it = context->symbols.find(name); - if (context->symbols.end() == it) + auto it = symbols.find(name); + if (symbols.end() == it) throw Exception("Cannot find symbol " + name + " in LLVMContext", ErrorCodes::LOGICAL_ERROR); function = it->second; } @@ -373,16 +332,16 @@ public: } }; -static void compileFunctionToLLVMByteCode(std::shared_ptr & context, const IFunctionBase & f) +static void compileFunctionToLLVMByteCode(LLVMContext & context, const IFunctionBase & f) { ProfileEvents::increment(ProfileEvents::CompileFunction); auto & arg_types = f.getArgumentTypes(); - auto & b = context->builder; + auto & b = context.builder; auto * size_type = b.getIntNTy(sizeof(size_t) * 8); auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type); auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, data_type->getPointerTo() }, /*isVarArg=*/false); - auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context->module.get()); + auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context.module.get()); auto args = func->args().begin(); llvm::Value * counter_arg = &*args++; llvm::Value * columns_arg = &*args++; @@ -490,6 +449,8 @@ static CompilableExpression subexpression(size_t i) return [=](llvm::IRBuilderBase &, const ValuePlaceholders & inputs) { return inputs[i](); }; } + + static CompilableExpression subexpression(const IFunctionBase & f, std::vector args) { return [&, args = std::move(args)](llvm::IRBuilderBase & builder, const ValuePlaceholders & inputs) @@ -504,12 +465,21 @@ static CompilableExpression subexpression(const IFunctionBase & f, std::vector context, const Block & sample_block) - : name(actions.back().result_name), context(context) +struct LLVMModuleState { + std::unordered_map symbols; + std::shared_ptr major_context; + std::shared_ptr memory_manager; +}; + +LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, const Block & sample_block) + : name(actions.back().result_name) + , module_state(std::make_unique()) +{ + LLVMContext context; for (const auto & c : sample_block) /// TODO: implement `getNativeValue` for all types & replace the check with `c.column && toNativeType(...)` - if (c.column && getNativeValue(toNativeType(context->builder, c.type), *c.column, 0)) + if (c.column && getNativeValue(toNativeType(context.builder, c.type), *c.column, 0)) subexpressions[c.name] = subexpression(c.column, c.type); for (const auto & action : actions) { @@ -530,6 +500,11 @@ LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, std::shar originals.push_back(action.function_base); } compileFunctionToLLVMByteCode(context, *this); + context.compileAllFunctionsToNativeCode(); + + module_state->symbols = context.symbols; + module_state->major_context = context.context; + module_state->memory_manager = context.memory_manager; } llvm::Value * LLVMFunction::compile(llvm::IRBuilderBase & builder, ValuePlaceholders values) const @@ -540,8 +515,7 @@ llvm::Value * LLVMFunction::compile(llvm::IRBuilderBase & builder, ValuePlacehol return it->second(builder, values); } - -PreparedFunctionPtr LLVMFunction::prepare(const Block &, const ColumnNumbers &, size_t) const { return std::make_shared(name, context); } +PreparedFunctionPtr LLVMFunction::prepare(const Block &, const ColumnNumbers &, size_t) const { return std::make_shared(name, module_state->symbols); } bool LLVMFunction::isDeterministic() const { @@ -622,28 +596,6 @@ static bool isCompilable(const IFunctionBase & function) return function.isCompilable(); } -size_t CompiledExpressionCache::weight() const -{ - -#if LLVM_VERSION_MAJOR >= 6 - std::lock_guard lock(mutex); - size_t result{0}; - std::unordered_set seen; - for (const auto & cell : cells) - { - auto function_context = cell.second.value->getContext(); - if (!seen.count(function_context->id)) - { - result += function_context->memory_mapper->memory_tracker.get(); - seen.insert(function_context->id); - } - } - return result; -#else - return Base::weight(); -#endif -} - std::vector>> getActionsDependents(const ExpressionActions::Actions & actions, const Names & output_columns) { /// an empty optional is a poisoned value prohibiting the column's producer from being removed @@ -748,21 +700,16 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&inlined_func=std::as_const(fused[i]), &sample_block] () { Stopwatch watch; - std::shared_ptr context = std::make_shared(); - auto result_fn = std::make_shared(inlined_func, context, sample_block); - size_t used_memory = context->compileAllFunctionsToNativeCode(); - ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); + std::shared_ptr result_fn; + result_fn = std::make_shared(inlined_func, sample_block); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); return result_fn; }); } else { - std::shared_ptr context = std::make_shared(); Stopwatch watch; - fn = std::make_shared(fused[i], context, sample_block); - size_t used_memory = context->compileAllFunctionsToNativeCode(); - ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); + fn = std::make_shared(fused[i], sample_block); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); } diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index f5ec420f37c..33a3ac56845 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -14,19 +14,23 @@ namespace DB { -struct LLVMContext; using CompilableExpression = std::function; +struct LLVMModuleState; + class LLVMFunction : public IFunctionBase { std::string name; Names arg_names; DataTypes arg_types; - std::shared_ptr context; + std::vector originals; - std::unordered_map subexpressions; + std::unordered_map subexpressions; + + std::unique_ptr module_state; + public: - LLVMFunction(const ExpressionActions::Actions & actions, std::shared_ptr context, const Block & sample_block); + LLVMFunction(const ExpressionActions::Actions & actions, const Block & sample_block); bool isCompilable() const override { return true; } @@ -54,8 +58,7 @@ public: Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override; - std::shared_ptr getContext() const { return context; } - + const LLVMModuleState * getLLVMModuleState() const { return module_state.get(); } }; /** This child of LRUCache breaks one of it's invariants: total weight may be changed after insertion. @@ -63,13 +66,9 @@ public: */ class CompiledExpressionCache : public LRUCache { -private: - using Base = LRUCache; - public: + using Base = LRUCache; using Base::Base; - - size_t weight() const; }; /// For each APPLY_FUNCTION action, try to compile the function to native code; if the only uses of a compilable From 748bcb0a3c8cd0bcd7fdf57bf4573ce68a7f5cb7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 12 Jan 2019 00:18:57 +0300 Subject: [PATCH 42/56] Remove accident changes --- dbms/src/Interpreters/ExpressionJIT.cpp | 2 -- dbms/src/Interpreters/ExpressionJIT.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 6939274fadd..f41be9d6b97 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -449,8 +449,6 @@ static CompilableExpression subexpression(size_t i) return [=](llvm::IRBuilderBase &, const ValuePlaceholders & inputs) { return inputs[i](); }; } - - static CompilableExpression subexpression(const IFunctionBase & f, std::vector args) { return [&, args = std::move(args)](llvm::IRBuilderBase & builder, const ValuePlaceholders & inputs) diff --git a/dbms/src/Interpreters/ExpressionJIT.h b/dbms/src/Interpreters/ExpressionJIT.h index 33a3ac56845..f8c638cd03e 100644 --- a/dbms/src/Interpreters/ExpressionJIT.h +++ b/dbms/src/Interpreters/ExpressionJIT.h @@ -25,7 +25,7 @@ class LLVMFunction : public IFunctionBase DataTypes arg_types; std::vector originals; - std::unordered_map subexpressions; + std::unordered_map subexpressions; std::unique_ptr module_state; From a9550c5a43f374bb954d97d1662beec48feea2df Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 12 Jan 2019 10:19:18 +0300 Subject: [PATCH 43/56] Update ExpressionJIT.cpp --- dbms/src/Interpreters/ExpressionJIT.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index f41be9d6b97..223594db9b2 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -190,7 +190,7 @@ auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr) #endif #if LLVM_VERSION_MAJOR >= 7 -using ModulePtr = std::shared_ptr; +using ModulePtr = std::unique_ptr; #else using ModulePtr = std::shared_ptr; #endif From 974e1cbe6fdf7b8ea4f7d1d7dab69053c1c3ba2c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 13 Jan 2019 22:44:24 +0300 Subject: [PATCH 44/56] Update array.md --- docs/en/data_types/array.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/data_types/array.md b/docs/en/data_types/array.md index 552c549b622..3338d2648d2 100644 --- a/docs/en/data_types/array.md +++ b/docs/en/data_types/array.md @@ -2,7 +2,7 @@ Array of `T`-type items. -`T` can be anything, including an array. Use multi-dimensional arrays with caution. ClickHouse has limited support for multi-dimensional arrays. For example, they can't be stored in `MergeTree` tables. +`T` can be anything, including an array. ## Creating an array From 0a334b2dde269f8a2da38d059fd60d3bfb005e39 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Mon, 14 Jan 2019 10:04:00 +0300 Subject: [PATCH 45/56] Adding description of the CHECK TABLE query to the 'ru' doc (#4043) * translation for CHECK TABLE query to russian * fix link to nowhere --- docs/ru/operations/utils/clickhouse-local.md | 2 +- docs/ru/query_language/misc.md | 323 +++++++++++-------- 2 files changed, 187 insertions(+), 138 deletions(-) diff --git a/docs/ru/operations/utils/clickhouse-local.md b/docs/ru/operations/utils/clickhouse-local.md index b43eb3600f7..dd3e21d9ce1 100644 --- a/docs/ru/operations/utils/clickhouse-local.md +++ b/docs/ru/operations/utils/clickhouse-local.md @@ -1,7 +1,7 @@ # clickhouse-local -Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../query_language/index.md#queries) ClickHouse. +Принимает на вход данные, которые можно представить в табличном виде и выполняет над ними операции, заданные на [языке запросов](../../query_language/index.md) ClickHouse. `clickhouse-local` использует движок сервера ClickHouse, т.е. поддерживает все форматы данных и движки таблиц, с которыми работает ClickHouse, при этом для выполнения операций не требуется запущенный сервер. diff --git a/docs/ru/query_language/misc.md b/docs/ru/query_language/misc.md index 050d092befa..162648a8eef 100644 --- a/docs/ru/query_language/misc.md +++ b/docs/ru/query_language/misc.md @@ -1,40 +1,69 @@ -## ATTACH {#queries} +# Прочие виды запросов + +## ATTACH + Запрос полностью аналогичен запросу `CREATE`, но: - вместо слова `CREATE` используется слово `ATTACH`; -- запрос не создаёт данные на диске, а предполагает, что данные уже лежат в соответствующих местах, и всего лишь добавляет информацию о таблице в сервер. +- запрос не создаёт данные на диске, а предполагает, что данные уже лежат в соответствующих местах, и всего лишь добавляет информацию о таблице на сервер. После выполнения запроса `ATTACH` сервер будет знать о существовании таблицы. -После выполнения `ATTACH`, сервер будет знать о существовании таблицы. +Если таблица перед этим была отсоединена (`DETACH`), т.е. её структура известна, можно использовать сокращенную форму записи без определения структуры. -Если таблица перед этим была отсоединена (`DETACH`), т.е. её структура известна, то можно использовать сокращенную форму записи без определения структуры. - -``` sql +```sql ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster] ``` -Этот запрос используется при старте сервера. Сервер хранит метаданные таблиц в виде файлов с запросами `ATTACH`, которые он просто исполняет при запуске (за исключением системных таблиц, создание которых явно вписано в сервер). +Этот запрос используется при старте сервера. Сервер хранит метаданные таблиц в виде файлов с запросами `ATTACH`, которые он просто исполняет при запуске (за исключением системных таблиц, которые явно создаются на сервере). -## DROP -Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`. +## CHECK TABLE -``` sql -DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] +Проверяет таблицу на повреждение данных. + +```sql +CHECK TABLE [db.]name ``` -Удаляет все таблицы внутри базы данных db, а затем саму базу данных db. -Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует. +Запрос `CHECK TABLE` сравнивает текущие размеры файлов (в которых хранятся данные из колонок) с ожидаемыми значениями. Если значения не совпадают, данные в таблице считаются поврежденными. Искажение возможно, например, из-за сбоя при записи данных. -``` sql -DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +Ответ содержит колонку `result`, содержащую одну строку с типом [Boolean](../data_types/boolean.md). Допустимые значения: + +- 0 - данные в таблице повреждены; +- 1 - данные не повреждены. + +Запрос `CHECK TABLE` поддерживается только для следующих движков: + +- [Log](../operations/table_engines/log.md) +- [TinyLog](../operations/table_engines/tinylog.md) +- StripeLog + +В этих движках не предусмотрено автоматическое восстановление данных после сбоя. Поэтому используйте запрос `CHECK TABLE`, чтобы своевременно выявить повреждение данных. + +Обратите внимание, высокая защита целостности данных обеспечивается в таблицах семейства [MergeTree](../operations/table_engines/mergetree.md). Для избежания потери данных рекомендуется использовать именно эти таблицы. + +**Что делать, если данные повреждены** + +В этом случае можно скопировать оставшиеся неповрежденные данные в другую таблицу. Для этого: + +1. Создайте новую таблицу с такой же структурой, как у поврежденной таблицы. Для этого выполните запрос `CREATE TABLE AS `. +2. Установите значение параметра [max_threads](../operations/settings/settings.md#settings-max_threads) в 1. Это нужно для того, чтобы выполнить следующий запрос в одном потоке. Установить значение параметра можно через запрос: `SET max_threads = 1`. +3. Выполните запрос `INSERT INTO SELECT * FROM `. В результате неповрежденные данные будут скопированы в другую таблицу. Обратите внимание, будут скопированы только те данные, которые следуют до поврежденного участка. +4. Перезапустите `clickhouse-client`, чтобы вернуть предыдущее значение параметра `max_threads`. + +## DESCRIBE TABLE + +```sql +DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] ``` -Удаляет таблицу. -Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует. +Возвращает два столбца: `name`, `type` типа `String`, в которых описаны имена и типы столбцов указанной таблицы. + +Вложенные структуры данных выводятся в "развёрнутом" виде. То есть, каждый столбец - по отдельности, с именем через точку. ## DETACH + Удаляет из сервера информацию о таблице name. Сервер перестаёт знать о существовании таблицы. -``` sql +```sql DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` @@ -43,139 +72,35 @@ DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] Запроса `DETACH DATABASE` нет. -## RENAME -Переименовывает одну или несколько таблиц. +## DROP -``` sql -RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] +Запрос имеет два вида: `DROP DATABASE` и `DROP TABLE`. + +```sql +DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] ``` -Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). +Удаляет все таблицы внутри базы данных db, а затем саму базу данных db. +Если указано `IF EXISTS` - не выдавать ошибку, если база данных не существует. -## SHOW DATABASES - -``` sql -SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] +```sql +DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` -Выводит список всех баз данных. -Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. - -Смотрите также раздел "Форматы". - -## SHOW TABLES - -``` sql -SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] -``` - -Выводит список таблиц - -- из текущей БД или из БД db, если указано FROM db; -- всех, или имя которых соответствует шаблону pattern, если указано LIKE 'pattern'; - -Запрос полностью аналогичен запросу: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`. - -Смотрите также раздел "Оператор LIKE". - -## SHOW PROCESSLIST - -``` sql -SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] -``` - -Выводит список запросов, выполняющихся в данный момент времени, кроме запросов `SHOW PROCESSLIST`. - -Выдаёт таблицу, содержащую столбцы: - -**user** - пользователь, под которым был задан запрос. Следует иметь ввиду, что при распределённой обработке запроса на удалённые серверы запросы отправляются под пользователем default. И SHOW PROCESSLIST показывает имя пользователя для конкретного запроса, а не для запроса, который данный запрос инициировал. - -**address** - имя хоста, с которого был отправлен запрос. При распределённой обработке запроса на удалённых серверах — это имя хоста-инициатора запроса. Чтобы проследить, откуда был задан распределённый запрос изначально, следует смотреть SHOW PROCESSLIST на сервере-инициаторе запроса. - -**elapsed** - время выполнения запроса, в секундах. Запросы выводятся упорядоченными по убыванию времени выполнения. - -**rows_read**, **bytes_read** - сколько было прочитано строк, байт несжатых данных при обработке запроса. При распределённой обработке запроса суммируются данные со всех удалённых серверов. Именно эти данные используются для ограничений и квот. - -**memory_usage** - текущее потребление оперативки в байтах. Смотрите настройку max_memory_usage. - -**query** - сам запрос. В запросах INSERT данные для вставки не выводятся. - -**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы. - -Запрос полностью аналогичен запросу: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`. - -Полезный совет (выполните в консоли): - -```bash -watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" -``` - -## SHOW CREATE TABLE - -``` sql -SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] -``` - -Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. - -## DESCRIBE TABLE - -``` sql -DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] -``` - -Возвращает два столбца: `name`, `type` типа `String`, в которых описаны имена и типы столбцов указанной таблицы. - -Вложенные структуры данных выводятся в "развёрнутом" виде. То есть, каждый столбец - по отдельности, с именем через точку. +Удаляет таблицу. +Если указано `IF EXISTS` - не выдавать ошибку, если таблица не существует или база данных не существует. ## EXISTS -``` sql +```sql EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] ``` Возвращает один столбец типа `UInt8`, содержащий одно значение - `0`, если таблицы или БД не существует и `1`, если таблица в указанной БД существует. -## USE - -``` sql -USE db -``` - -Позволяет установить текущую базу данных для сессии. -Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы. -При использовании HTTP протокола, запрос не может быть выполнен, так как понятия сессии не существует. - -## SET - -``` sql -SET param = value -``` - -Позволяет установить настройку `param` в значение `value`. Также можно одним запросом установить все настройки из заданного профиля настроек - для этого, укажите в качестве имени настройки profile. Подробнее смотри раздел "Настройки". -Настройка устанавливается на сессию, или на сервер (глобально), если указано `GLOBAL`. -При установке глобальной настройки, настройка на все уже запущенные сессии, включая текущую сессию, не устанавливается, а будет использована только для новых сессий. - -При перезапуске сервера, теряются настройки, установленные с помощью `SET`. -Установить настройки, которые переживут перезапуск сервера, можно только с помощью конфигурационного файла сервера. - -## OPTIMIZE - -``` sql -OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] -``` - -Просит движок таблицы сделать что-нибудь, что может привести к более оптимальной работе. -Поддерживается только движками `*MergeTree`, в котором выполнение этого запроса инициирует внеочередное слияние кусков данных. -Если указан `PARTITION`, то оптимизация будет производиться только для указаной партиции. -Если указан `FINAL`, то оптимизация будет производиться даже когда все данные уже лежат в одном куске. - -!!! warning "Внимание" - Запрос OPTIMIZE не может устранить причину появления ошибки "Too many parts". - ## KILL QUERY -``` sql +```sql KILL QUERY [ON CLUSTER cluster] WHERE [SYNC|ASYNC|TEST] @@ -185,8 +110,9 @@ KILL QUERY [ON CLUSTER cluster] Пытается принудительно остановить исполняющиеся в данный момент запросы. Запросы для принудительной остановки выбираются из таблицы system.processes с помощью условия, указанного в секции `WHERE` запроса `KILL`. -Примеры: -``` sql +Примеры + +```sql -- Принудительно останавливает все запросы с указанным query_id: KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90' @@ -208,3 +134,126 @@ Readonly-пользователи могут останавливать толь Тестовый вариант запроса (`TEST`) только проверяет права пользователя и выводит список запросов для остановки. [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/misc/) + +## OPTIMIZE + +```sql +OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] +``` + +Просит движок таблицы сделать что-нибудь, что может привести к более оптимальной работе. +Поддерживается только движками `*MergeTree`, в котором выполнение этого запроса инициирует внеочередное слияние кусков данных. +Если указан `PARTITION`, то оптимизация будет производиться только для указаной партиции. +Если указан `FINAL`, то оптимизация будет производиться даже когда все данные уже лежат в одном куске. + +!!! warning "Внимание"Запрос OPTIMIZE не может устранить причину появления ошибки "Too many parts". + +## RENAME + +Переименовывает одну или несколько таблиц. + +```sql +RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] +``` + +Все таблицы переименовываются под глобальной блокировкой. Переименовывание таблицы является лёгкой операцией. Если вы указали после TO другую базу данных, то таблица будет перенесена в эту базу данных. При этом, директории с базами данных должны быть расположены в одной файловой системе (иначе возвращается ошибка). + +## SET + +```sql +SET param = value +``` + +Позволяет установить настройку `param` в значение `value`. Также можно одним запросом установить все настройки из заданного профиля настроек. Для этого укажите 'profile' в качестве имени настройки. Подробнее смотрите в разделе "Настройки". +Настройка устанавливается на сессию, или на сервер (глобально), если указано `GLOBAL`. +При установке глобальных настроек, эти настройки не применяются к уже запущенной сессии, включая текущую сессию. Она будет использована только для новых сессий. + +При перезапуске сервера теряются настройки, установленные с помощью `SET`. +Установить настройки, которые переживут перезапуск сервера, можно только с помощью конфигурационного файла сервера. + +## SHOW CREATE TABLE + +```sql +SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +``` + +Возвращает один столбец statement типа `String`, содержащий одно значение - запрос `CREATE`, с помощью которого создана указанная таблица. + +## SHOW DATABASES + +```sql +SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] +``` + +Выводит список всех баз данных. +Запрос полностью аналогичен запросу `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. + +Смотрите также раздел "Форматы". + +## SHOW PROCESSLIST + +```sql +SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] +``` + +Выводит список запросов, выполняющихся в данный момент времени, кроме запросов `SHOW PROCESSLIST`. + +Выдаёт таблицу, содержащую столбцы: + +**user** - пользователь, под которым был задан запрос. Следует иметь ввиду, что при распределённой обработке запроса на удалённые серверы запросы отправляются под пользователем 'default'. И SHOW PROCESSLIST показывает имя пользователя для конкретного запроса, а не для запроса, который данный запрос инициировал. + +**address** - имя хоста, с которого был отправлен запрос. При распределённой обработке запроса на удалённых серверах — это имя хоста-инициатора запроса. Чтобы проследить, откуда был задан распределённый запрос изначально, следует смотреть SHOW PROCESSLIST на сервере-инициаторе запроса. + +**elapsed** - время выполнения запроса, в секундах. Запросы выводятся в порядке убывания времени выполнения. + +**rows_read**, **bytes_read** - сколько было прочитано строк, байт несжатых данных при обработке запроса. При распределённой обработке запроса суммируются данные со всех удалённых серверов. Именно эти данные используются для ограничений и квот. + +**memory_usage** - текущее потребление оперативки в байтах. Смотрите настройку 'max_memory_usage'. + +**query** - сам запрос. В запросах INSERT данные для вставки не выводятся. + +**query_id** - идентификатор запроса. Непустой, только если был явно задан пользователем. При распределённой обработке запроса идентификатор запроса не передаётся на удалённые серверы. + +Запрос полностью аналогичен запросу: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`. + +Полезный совет (выполните в консоли): + +```bash +watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" +``` + +## SHOW TABLES + +```sql +SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] +``` + +Выводит список таблиц: + +- из текущей базы данных или из базы db, если указано `FROM db`; +- всех, или имя которых соответствует шаблону pattern, если указано `LIKE 'pattern'`; + +Запрос полностью аналогичен запросу: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`. + +Смотрите также раздел "Оператор LIKE". + +## TRUNCATE + +```sql +TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Удаляет все данные из таблицы. Если условие `IF EXISTS` не указано, запрос вернет ошибку, если таблицы не существует. + +Запрос `TRUNCATE` не поддерживается для следующих движков: [View](../operations/table_engines/view.md), [File](../operations/table_engines/file.md), [URL](../operations/table_engines/url.md) and [Null](../operations/table_engines/null.md). + +## USE + +```sql +USE db +``` + +Позволяет установить текущую базу данных для сессии. +Текущая база данных используется для поиска таблиц, если база данных не указана в запросе явно через точку перед именем таблицы. +При использовании HTTP протокола запрос не может быть выполнен, так как понятия сессии не существует. + From 9c35598373cd17755b52768be4ce75abd4851bea Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 27 Dec 2018 16:27:01 +0300 Subject: [PATCH 46/56] Add cppkafka to contrib --- .gitmodules | 3 + cmake/find_rdkafka.cmake | 4 +- contrib/CMakeLists.txt | 1 + contrib/cppkafka | 1 + contrib/cppkafka-cmake/CMakeLists.txt | 31 ++++++ contrib/librdkafka-cmake/CMakeLists.txt | 104 +++++++++--------- .../include/librdkafka/rdkafka.h | 5 + dbms/CMakeLists.txt | 1 + dbms/src/Storages/Kafka/StorageKafka.cpp | 36 +++--- dbms/src/Storages/Kafka/StorageKafka.h | 12 +- 10 files changed, 120 insertions(+), 78 deletions(-) create mode 160000 contrib/cppkafka create mode 100644 contrib/cppkafka-cmake/CMakeLists.txt create mode 100644 contrib/librdkafka-cmake/include/librdkafka/rdkafka.h diff --git a/.gitmodules b/.gitmodules index 923554a1532..24211b6707e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "contrib/libgsasl"] path = contrib/libgsasl url = https://github.com/ClickHouse-Extras/libgsasl.git +[submodule "contrib/cppkafka"] + path = contrib/cppkafka + url = https://github.com/mfontanini/cppkafka.git diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index 9ba48cadfcd..b0a0a98b382 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -25,6 +25,7 @@ endif () if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) set (USE_RDKAFKA 1) set (RDKAFKA_LIBRARY ${RDKAFKA_LIB} ${OPENSSL_LIBRARIES}) + set (CPPKAFKA_LIBRARY cppkafka) if (SASL2_LIBRARY) list (APPEND RDKAFKA_LIBRARY ${SASL2_LIBRARY}) endif () @@ -35,9 +36,10 @@ elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT ARCH_ARM) set (USE_INTERNAL_RDKAFKA_LIBRARY 1) set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") set (RDKAFKA_LIBRARY rdkafka) + set (CPPKAFKA_LIBRARY cppkafka) set (USE_RDKAFKA 1) endif () endif () -message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY}") +message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9d964f288d8..25ad30e02eb 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -121,6 +121,7 @@ endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) add_subdirectory (librdkafka-cmake) + add_subdirectory (cppkafka-cmake) target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () diff --git a/contrib/cppkafka b/contrib/cppkafka new file mode 160000 index 00000000000..520465510ef --- /dev/null +++ b/contrib/cppkafka @@ -0,0 +1 @@ +Subproject commit 520465510efef7704346cf8d140967c4abb057c1 diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f62fa471172 --- /dev/null +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -0,0 +1,31 @@ +set(CPPKAFKA_DIR ${CMAKE_SOURCE_DIR}/contrib/cppkafka) + +set(SRCS + ${CPPKAFKA_DIR}/src/configuration.cpp + ${CPPKAFKA_DIR}/src/topic_configuration.cpp + ${CPPKAFKA_DIR}/src/configuration_option.cpp + ${CPPKAFKA_DIR}/src/exceptions.cpp + ${CPPKAFKA_DIR}/src/topic.cpp + ${CPPKAFKA_DIR}/src/buffer.cpp + ${CPPKAFKA_DIR}/src/queue.cpp + ${CPPKAFKA_DIR}/src/message.cpp + ${CPPKAFKA_DIR}/src/message_timestamp.cpp + ${CPPKAFKA_DIR}/src/message_internal.cpp + ${CPPKAFKA_DIR}/src/topic_partition.cpp + ${CPPKAFKA_DIR}/src/topic_partition_list.cpp + ${CPPKAFKA_DIR}/src/metadata.cpp + ${CPPKAFKA_DIR}/src/group_information.cpp + ${CPPKAFKA_DIR}/src/error.cpp + ${CPPKAFKA_DIR}/src/event.cpp + + ${CPPKAFKA_DIR}/src/kafka_handle_base.cpp + ${CPPKAFKA_DIR}/src/producer.cpp + ${CPPKAFKA_DIR}/src/consumer.cpp +) + +add_library(cppkafka ${LINK_MODE} ${SRCS}) + +target_link_libraries(cppkafka ${RDKAFKA_LIBRARY}) +target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka) +target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include) diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 90421cfb31d..3b35634dabc 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -1,60 +1,60 @@ set(RDKAFKA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/librdkafka/src) set(SRCS -${RDKAFKA_SOURCE_DIR}/crc32c.c -${RDKAFKA_SOURCE_DIR}/rdaddr.c -${RDKAFKA_SOURCE_DIR}/rdavl.c -${RDKAFKA_SOURCE_DIR}/rdbuf.c -${RDKAFKA_SOURCE_DIR}/rdcrc32.c -${RDKAFKA_SOURCE_DIR}/rdkafka.c -${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c -${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c -${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c -${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c -${RDKAFKA_SOURCE_DIR}/rdkafka_event.c -${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c -${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c -${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c -${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c -${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c -${RDKAFKA_SOURCE_DIR}/rdkafka_op.c -${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c -${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c -${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c -${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_request.c -${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c -${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c -${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c -${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c -${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c -${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c -${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_header.c -${RDKAFKA_SOURCE_DIR}/rdlist.c -${RDKAFKA_SOURCE_DIR}/rdlog.c -${RDKAFKA_SOURCE_DIR}/rdmurmur2.c -${RDKAFKA_SOURCE_DIR}/rdports.c -${RDKAFKA_SOURCE_DIR}/rdrand.c -${RDKAFKA_SOURCE_DIR}/rdregex.c -${RDKAFKA_SOURCE_DIR}/rdstring.c -${RDKAFKA_SOURCE_DIR}/rdunittest.c -${RDKAFKA_SOURCE_DIR}/rdvarint.c -${RDKAFKA_SOURCE_DIR}/snappy.c -${RDKAFKA_SOURCE_DIR}/tinycthread.c -${RDKAFKA_SOURCE_DIR}/xxhash.c -${RDKAFKA_SOURCE_DIR}/lz4.c -${RDKAFKA_SOURCE_DIR}/lz4frame.c -${RDKAFKA_SOURCE_DIR}/lz4hc.c -${RDKAFKA_SOURCE_DIR}/rdgz.c + ${RDKAFKA_SOURCE_DIR}/crc32c.c + ${RDKAFKA_SOURCE_DIR}/rdaddr.c + ${RDKAFKA_SOURCE_DIR}/rdavl.c + ${RDKAFKA_SOURCE_DIR}/rdbuf.c + ${RDKAFKA_SOURCE_DIR}/rdcrc32.c + ${RDKAFKA_SOURCE_DIR}/rdkafka.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_event.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_op.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_request.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_header.c + ${RDKAFKA_SOURCE_DIR}/rdlist.c + ${RDKAFKA_SOURCE_DIR}/rdlog.c + ${RDKAFKA_SOURCE_DIR}/rdmurmur2.c + ${RDKAFKA_SOURCE_DIR}/rdports.c + ${RDKAFKA_SOURCE_DIR}/rdrand.c + ${RDKAFKA_SOURCE_DIR}/rdregex.c + ${RDKAFKA_SOURCE_DIR}/rdstring.c + ${RDKAFKA_SOURCE_DIR}/rdunittest.c + ${RDKAFKA_SOURCE_DIR}/rdvarint.c + ${RDKAFKA_SOURCE_DIR}/snappy.c + ${RDKAFKA_SOURCE_DIR}/tinycthread.c + ${RDKAFKA_SOURCE_DIR}/xxhash.c + ${RDKAFKA_SOURCE_DIR}/lz4.c + ${RDKAFKA_SOURCE_DIR}/lz4frame.c + ${RDKAFKA_SOURCE_DIR}/lz4hc.c + ${RDKAFKA_SOURCE_DIR}/rdgz.c ) add_library(rdkafka ${LINK_MODE} ${SRCS}) -target_include_directories(rdkafka PRIVATE include) +target_include_directories(rdkafka SYSTEM PUBLIC include) target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h new file mode 100644 index 00000000000..3387659281a --- /dev/null +++ b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h @@ -0,0 +1,5 @@ +#if __has_include() // maybe bundled +# include_next // Y_IGNORE +#else // system +# include_next +#endif diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 7e39fd2f7af..84099810164 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -287,6 +287,7 @@ endif () if (USE_RDKAFKA) target_link_libraries (dbms PRIVATE ${RDKAFKA_LIBRARY}) + target_link_libraries (dbms PRIVATE ${CPPKAFKA_LIBRARY}) if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) endif () diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index e6ccf544ba1..f855ea7e877 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -1,39 +1,35 @@ -#include -#include +#include + #if USE_RDKAFKA -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include +#include #include #include #include +#include #include #include #include -#include #include -#include -#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#if __has_include() // maybe bundled -#include // Y_IGNORE -#else // system -#include -#endif +#include namespace DB diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index 561349ac474..e7cce510166 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -1,16 +1,18 @@ #pragma once + #include + #if USE_RDKAFKA -#include - -#include -#include #include -#include +#include #include +#include #include #include +#include + +#include struct rd_kafka_s; struct rd_kafka_conf_s; From 337c092c7e152ccc1bc80ea006412c1b878b8bb5 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 27 Dec 2018 17:18:20 +0300 Subject: [PATCH 47/56] Use cppkafka instead of raw C interface --- dbms/src/Storages/Kafka/StorageKafka.cpp | 173 +++++------------------ dbms/src/Storages/Kafka/StorageKafka.h | 24 +--- 2 files changed, 42 insertions(+), 155 deletions(-) diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index f855ea7e877..063fff81e2d 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -29,8 +29,6 @@ #include #include -#include - namespace DB { @@ -58,8 +56,8 @@ static const String CONFIG_PREFIX = "kafka"; class ReadBufferFromKafkaConsumer : public ReadBuffer { - rd_kafka_t * consumer; - rd_kafka_message_t * current = nullptr; + ConsumerPtr consumer; + cppkafka::Message current; bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration. Poco::Logger * log; size_t read_messages = 0; @@ -69,42 +67,36 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer { if (current_pending) { - BufferBase::set(reinterpret_cast(current->payload), current->len, 0); + // XXX: very fishy place with const casting. + BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); current_pending = false; return true; } // Process next buffered message - rd_kafka_message_t * msg = rd_kafka_consumer_poll(consumer, READ_POLL_MS); // XXX: use RAII. - if (msg == nullptr) + auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); + if (!message) return false; - if (msg->err) + if (message.is_eof()) { - if (msg->err != RD_KAFKA_RESP_ERR__PARTITION_EOF) - { - LOG_ERROR(log, "Consumer error: " << rd_kafka_err2str(msg->err) << " " << rd_kafka_message_errstr(msg)); - rd_kafka_message_destroy(msg); - return false; - } - - // Reach EOF while reading current batch, skip it - LOG_TRACE(log, "EOF reached for partition " << msg->partition << " offset " << msg->offset); - rd_kafka_message_destroy(msg); + // Reached EOF while reading current batch, skip it. + LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset()); return nextImpl(); } - - if (msg->len && !msg->payload) - throw Exception("Logical error: nullptr message returned with non-zero length", ErrorCodes::LOGICAL_ERROR); + else if (auto err = message.get_error()) + { + LOG_ERROR(log, "Consumer error: " << err); + return false; + } ++read_messages; // Now we've received a new message. Check if we need to produce a delimiter - if (row_delimiter != '\0' && current != nullptr) + if (row_delimiter != '\0' && current) { BufferBase::set(&row_delimiter, 1, 0); - reset(); - current = msg; + current = std::move(message); current_pending = true; return true; } @@ -112,31 +104,21 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer // Consume message and mark the topic/partition offset // The offsets will be committed in the readSuffix() method after the block is completed // If an exception is thrown before that would occur, the client will rejoin without committing offsets - reset(); - current = msg; - BufferBase::set(reinterpret_cast(current->payload), current->len, 0); + current = std::move(message); + + // XXX: very fishy place with const casting. + BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); return true; } - void reset() - { - if (current != nullptr) - { - rd_kafka_message_destroy(current); - current = nullptr; - } - } - public: - ReadBufferFromKafkaConsumer(rd_kafka_t * consumer_, Poco::Logger * log_, char row_delimiter_) + ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_) : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_) { if (row_delimiter != '\0') LOG_TRACE(log, "Row delimiter is: " << row_delimiter); } - ~ReadBufferFromKafkaConsumer() override { reset(); } - /// Commit messages read with this consumer void commit() { @@ -144,10 +126,7 @@ public: if (read_messages == 0) return; - auto err = rd_kafka_commit(consumer, nullptr, 1 /* async */); - if (err) - throw Exception("Failed to commit offsets: " + String(rd_kafka_err2str(err)), ErrorCodes::UNKNOWN_EXCEPTION); - + consumer->async_commit(); read_messages = 0; } }; @@ -211,7 +190,7 @@ public: if (consumer == nullptr) throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); - read_buf = std::make_unique(consumer->stream, storage.log, storage.row_delimiter); + read_buf = std::make_unique(consumer, storage.log, storage.row_delimiter); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); } @@ -235,7 +214,7 @@ public: private: StorageKafka & storage; - StorageKafka::ConsumerPtr consumer; + ConsumerPtr consumer; Context context; size_t max_block_size; Block sample_block; @@ -247,7 +226,7 @@ private: bool hasClaimed() { return consumer != nullptr; } }; -static void loadFromConfig(struct rd_kafka_conf_s * conf, const AbstractConfiguration & config, const std::string & path) +static void loadFromConfig(cppkafka::Configuration & conf, const AbstractConfiguration & config, const std::string & path) { AbstractConfiguration::Keys keys; std::vector errstr(512); @@ -258,8 +237,7 @@ static void loadFromConfig(struct rd_kafka_conf_s * conf, const AbstractConfigur { const String key_path = path + "." + key; const String key_name = boost::replace_all_copy(key, "_", "."); - if (rd_kafka_conf_set(conf, key_name.c_str(), config.getString(key_path).c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception("Invalid Kafka setting " + key_path + " in config: " + String(errstr.data()), ErrorCodes::INVALID_CONFIG_PARAMETER); + conf.set(key_name, config.getString(key_path)); } } @@ -322,21 +300,8 @@ void StorageKafka::startup() { for (size_t i = 0; i < num_consumers; ++i) { - // Building configuration may throw, the consumer configuration must be destroyed in that case - auto consumer_conf = rd_kafka_conf_new(); - try - { - consumerConfiguration(consumer_conf); - } - catch (...) - { - rd_kafka_conf_destroy(consumer_conf); - throw; - } - // Create a consumer and subscribe to topics - // Note: consumer takes ownership of the configuration - auto consumer = std::make_shared(consumer_conf); + auto consumer = std::make_shared(createConsumerConfiguration()); consumer->subscribe(topics); // Make consumer available @@ -358,7 +323,7 @@ void StorageKafka::shutdown() for (size_t i = 0; i < num_created_consumers; ++i) { auto consumer = claimConsumer(); - consumer->close(); + // FIXME: not sure if really close consumers here, and if we really need to close them here. } LOG_TRACE(log, "Waiting for cleanup"); @@ -374,24 +339,20 @@ void StorageKafka::updateDependencies() } -void StorageKafka::consumerConfiguration(struct rd_kafka_conf_s * conf) +cppkafka::Configuration StorageKafka::createConsumerConfiguration() { - std::vector errstr(512); + cppkafka::Configuration conf; LOG_TRACE(log, "Setting brokers: " << brokers); - if (rd_kafka_conf_set(conf, "metadata.broker.list", brokers.c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); + conf.set("metadata.broker.list", brokers); LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); + conf.set("group.id", group); - if (rd_kafka_conf_set(conf, "group.id", group.c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); - - if (rd_kafka_conf_set(conf, "client.id", VERSION_FULL, errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); + conf.set("client.id", VERSION_FULL); // We manually commit offsets after a stream successfully finished - rd_kafka_conf_set(conf, "enable.auto.commit", "false", nullptr, 0); + conf.set("enable.auto.commit", "false"); // Update consumer configuration from the configuration const auto & config = global_context.getConfigRef(); @@ -405,14 +366,16 @@ void StorageKafka::consumerConfiguration(struct rd_kafka_conf_s * conf) if (config.has(topic_config_key)) loadFromConfig(conf, config, topic_config_key); } + + return conf; } -StorageKafka::ConsumerPtr StorageKafka::claimConsumer() +ConsumerPtr StorageKafka::claimConsumer() { return tryClaimConsumer(-1L); } -StorageKafka::ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) +ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) { // Wait for the first free consumer if (wait_ms >= 0) @@ -430,7 +393,7 @@ StorageKafka::ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) return consumer; } -void StorageKafka::pushConsumer(StorageKafka::ConsumerPtr consumer) +void StorageKafka::pushConsumer(ConsumerPtr consumer) { std::lock_guard lock(mutex); consumers.push_back(consumer); @@ -553,64 +516,6 @@ bool StorageKafka::streamToViews() } -StorageKafka::Consumer::Consumer(struct rd_kafka_conf_s * conf) -{ - std::vector errstr(512); - stream = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr.data(), errstr.size()); - if (stream == nullptr) - { - rd_kafka_conf_destroy(conf); - throw Exception("Failed to create consumer handle: " + String(errstr.data()), ErrorCodes::UNKNOWN_EXCEPTION); - } - - rd_kafka_poll_set_consumer(stream); -} - - -StorageKafka::Consumer::~Consumer() -{ - close(); -} - - -void StorageKafka::Consumer::subscribe(const Names & topics_to_subscribe) -{ - if (stream == nullptr) - throw Exception("Cannot subscribe to topics when consumer is closed", ErrorCodes::UNKNOWN_EXCEPTION); - - // Create a list of partitions - auto * topic_list = rd_kafka_topic_partition_list_new(topics_to_subscribe.size()); - for (const auto & topic : topics_to_subscribe) - rd_kafka_topic_partition_list_add(topic_list, topic.c_str(), RD_KAFKA_PARTITION_UA); - - // Subscribe to requested topics - auto err = rd_kafka_subscribe(stream, topic_list); - if (err) - { - rd_kafka_topic_partition_list_destroy(topic_list); - throw Exception("Failed to subscribe: " + String(rd_kafka_err2str(err)), ErrorCodes::UNKNOWN_EXCEPTION); - } - - rd_kafka_topic_partition_list_destroy(topic_list); -} - - -void StorageKafka::Consumer::unsubscribe() -{ - if (stream != nullptr) - rd_kafka_unsubscribe(stream); -} - -void StorageKafka::Consumer::close() -{ - if (stream != nullptr) - { - rd_kafka_consumer_close(stream); - rd_kafka_destroy(stream); - stream = nullptr; - } -} - void registerStorageKafka(StorageFactory & factory) { factory.registerStorage("Kafka", [](const StorageFactory::Arguments & args) diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index e7cce510166..d6b324e8a85 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -12,15 +12,13 @@ #include #include +#include #include -struct rd_kafka_s; -struct rd_kafka_conf_s; - namespace DB { -class StorageKafka; +using ConsumerPtr = std::shared_ptr; /** Implements a Kafka queue table engine that can be used as a persistent queue / buffer, * or as a basic building block for creating pipelines with a continuous insertion / ETL. @@ -55,22 +53,6 @@ public: void updateDependencies() override; private: - /// Each engine typically has one consumer (able to process 1..N partitions) - /// It is however possible to create multiple consumers per table, as long - /// as the total number of consumers is <= number of partitions. - struct Consumer - { - Consumer(struct rd_kafka_conf_s * conf); - ~Consumer(); - - void subscribe(const Names & topics); - void unsubscribe(); - void close(); - - struct rd_kafka_s * stream = nullptr; - }; - using ConsumerPtr = std::shared_ptr; - // Configuration and state String table_name; String database_name; @@ -102,7 +84,7 @@ private: BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; - void consumerConfiguration(struct rd_kafka_conf_s * conf); + cppkafka::Configuration createConsumerConfiguration(); ConsumerPtr claimConsumer(); ConsumerPtr tryClaimConsumer(long wait_ms); void pushConsumer(ConsumerPtr c); From 77daa519ff3659021ad50559f29b54e2b5f27127 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 28 Dec 2018 12:07:58 +0300 Subject: [PATCH 48/56] Update librdkafka to v1.0.0-RC5 --- .gitignore | 3 +++ contrib/cppkafka-cmake/CMakeLists.txt | 2 +- contrib/librdkafka | 2 +- contrib/librdkafka-cmake/CMakeLists.txt | 3 +++ contrib/librdkafka-cmake/config.h | 2 ++ 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9816f1cbb6c..3fa4a095a1b 100644 --- a/.gitignore +++ b/.gitignore @@ -251,3 +251,6 @@ website/package-lock.json # cquery cache /.cquery-cache + +# ccls cache +/.ccls-cache diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt index f62fa471172..9fb98e35b47 100644 --- a/contrib/cppkafka-cmake/CMakeLists.txt +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -25,7 +25,7 @@ set(SRCS add_library(cppkafka ${LINK_MODE} ${SRCS}) -target_link_libraries(cppkafka ${RDKAFKA_LIBRARY}) +target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY}) target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka) target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include) diff --git a/contrib/librdkafka b/contrib/librdkafka index 7478b5ef16a..363dcad5a23 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 +Subproject commit 363dcad5a23dc29381cc626620e68ae418b3af19 diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 3b35634dabc..115c916e9f4 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -8,12 +8,14 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/rdcrc32.c ${RDKAFKA_SOURCE_DIR}/rdkafka.c ${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_background.c ${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c ${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c ${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c ${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c ${RDKAFKA_SOURCE_DIR}/rdkafka_event.c ${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_idempotence.c ${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c @@ -47,6 +49,7 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/rdvarint.c ${RDKAFKA_SOURCE_DIR}/snappy.c ${RDKAFKA_SOURCE_DIR}/tinycthread.c + ${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c ${RDKAFKA_SOURCE_DIR}/xxhash.c ${RDKAFKA_SOURCE_DIR}/lz4.c ${RDKAFKA_SOURCE_DIR}/lz4frame.c diff --git a/contrib/librdkafka-cmake/config.h b/contrib/librdkafka-cmake/config.h index 68e93a10ff1..f94c6bfc630 100644 --- a/contrib/librdkafka-cmake/config.h +++ b/contrib/librdkafka-cmake/config.h @@ -71,4 +71,6 @@ #define HAVE_PTHREAD_SETNAME_GNU 1 // python //#define HAVE_PYTHON 1 +// C11 threads +#define WITH_C11THREADS 1 #endif /* _CONFIG_H_ */ From 07f8ef4f4c601cdd3dadf653cb29c137eb82f75a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 11 Jan 2019 16:36:30 +0300 Subject: [PATCH 49/56] Use C11 threads only if available --- contrib/librdkafka-cmake/config.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/contrib/librdkafka-cmake/config.h b/contrib/librdkafka-cmake/config.h index f94c6bfc630..2ffc5a497ae 100644 --- a/contrib/librdkafka-cmake/config.h +++ b/contrib/librdkafka-cmake/config.h @@ -1,4 +1,4 @@ -// Automatically generated by ./configure +// Automatically generated by ./configure #ifndef _CONFIG_H_ #define _CONFIG_H_ #define ARCH "x86_64" @@ -72,5 +72,7 @@ // python //#define HAVE_PYTHON 1 // C11 threads -#define WITH_C11THREADS 1 +#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__) +# define WITH_C11THREADS 1 +#endif #endif /* _CONFIG_H_ */ From 5024d1b7f762b72e7d5ca3370c4526ec204dc414 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 14:52:12 +0300 Subject: [PATCH 50/56] Fix macos build --- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index b823a9257ad..e73deaae2ca 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -713,7 +713,7 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams( readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream); } - size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows); + size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows); readIndexes(num_rows_to_read); limit -= num_rows_to_read; low_cardinality_state->num_pending_rows -= num_rows_to_read; From c0b72492c09164f23a68d9ad7c38dab28e6d3a63 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 16:33:14 +0300 Subject: [PATCH 51/56] Fix macos build --- .../Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp index 1c847eb0e11..9dce4edb239 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp @@ -91,7 +91,7 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart() UInt64 rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); if (!rows_to_read) return rows_to_read; - rows_to_read = std::max(index_granularity, rows_to_read); + rows_to_read = std::max(index_granularity, rows_to_read); if (current_preferred_max_column_in_block_size_bytes) { From af0b875f57aa65d5095563a218da85aa457a37b0 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 17:15:39 +0300 Subject: [PATCH 52/56] Fix macos build --- .../src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 2f1ee2a2943..d0b5636f3dc 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -199,7 +199,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate); /// We will not touch the last `min_replicated_logs_to_keep` records. - entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end()); + entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end()); /// We will not touch records that are no less than `min_saved_log_pointer`. entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end()); From 72df7ceee6c126086b081a054b06b02569a154c2 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 18:18:56 +0300 Subject: [PATCH 53/56] Macos build fix --- dbms/src/Formats/PrettyBlockOutputStream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/PrettyBlockOutputStream.cpp b/dbms/src/Formats/PrettyBlockOutputStream.cpp index 16df780993e..fe102ea5739 100644 --- a/dbms/src/Formats/PrettyBlockOutputStream.cpp +++ b/dbms/src/Formats/PrettyBlockOutputStream.cpp @@ -61,7 +61,7 @@ void PrettyBlockOutputStream::calculateWidths( elem.type->serializeText(*elem.column, j, out, format_settings); } - widths[i][j] = std::min(format_settings.pretty.max_column_pad_width, + widths[i][j] = std::min(format_settings.pretty.max_column_pad_width, UTF8::computeWidth(reinterpret_cast(serialized_value.data()), serialized_value.size(), prefix)); max_widths[i] = std::max(max_widths[i], widths[i][j]); } @@ -69,7 +69,7 @@ void PrettyBlockOutputStream::calculateWidths( /// And also calculate widths for names of columns. { // name string doesn't contain Tab, no need to pass `prefix` - name_widths[i] = std::min(format_settings.pretty.max_column_pad_width, + name_widths[i] = std::min(format_settings.pretty.max_column_pad_width, UTF8::computeWidth(reinterpret_cast(elem.name.data()), elem.name.size())); max_widths[i] = std::max(max_widths[i], name_widths[i]); } From 4b6af5788a6efe703f3402615bc0e6b1b6250417 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 19:30:36 +0300 Subject: [PATCH 54/56] Fix macos build --- .../src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index d0b5636f3dc..d6ae21fc8be 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -295,7 +295,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() /// Virtual node, all nodes that are "greater" than this one will be deleted NodeWithStat block_threshold{{}, time_threshold}; - size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value); + size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value); auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window; auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); From 15eee83be7b05b2629c599322b0c0f2c24b615dc Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 14 Jan 2019 16:06:14 +0300 Subject: [PATCH 55/56] Use internal cppkafka in unbundled configuration --- cmake/find_rdkafka.cmake | 1 + contrib/CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index b0a0a98b382..1c93b99a344 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -20,6 +20,7 @@ if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) if (USE_STATIC_LIBRARIES AND NOT OS_FREEBSD) find_library (SASL2_LIBRARY sasl2) endif () + set (CPPKAFKA_LIBRARY cppkafka) # TODO: try to use unbundled version. endif () if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 25ad30e02eb..8504e5facdb 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -121,11 +121,14 @@ endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) add_subdirectory (librdkafka-cmake) - add_subdirectory (cppkafka-cmake) target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () +if (USE_RDKAFKA) + add_subdirectory (cppkafka-cmake) +endif() + if (ENABLE_ODBC AND USE_INTERNAL_ODBC_LIBRARY) add_subdirectory (unixodbc-cmake) endif () From dbb88e8cb4075d9bed616d36c9bb3122e0068d8b Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 14 Jan 2019 21:15:04 +0300 Subject: [PATCH 56/56] helpers for ASTIdentifier (hide semantic and casts) --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 +- dbms/src/Databases/DatabaseOrdinary.cpp | 1 + dbms/src/Databases/DatabasesCommon.cpp | 1 + dbms/src/Interpreters/AnalyzedJoin.cpp | 4 +- .../Interpreters/ArrayJoinedColumnsVisitor.h | 2 +- dbms/src/Interpreters/ColumnNamesContext.cpp | 2 +- dbms/src/Interpreters/ColumnNamesContext.h | 4 +- .../DatabaseAndTableWithAlias.cpp | 44 +++++-------- .../Interpreters/DatabaseAndTableWithAlias.h | 1 + dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +- dbms/src/Interpreters/ExternalTablesVisitor.h | 6 +- .../InJoinSubqueriesPreprocessor.cpp | 8 +-- .../Interpreters/InterpreterDescribeQuery.cpp | 8 +-- .../JoinToSubqueryTransformVisitor.cpp | 4 +- dbms/src/Interpreters/QueryNormalizer.cpp | 22 +++---- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 15 +++-- .../TranslateQualifiedNamesVisitor.cpp | 2 +- dbms/src/Interpreters/executeQuery.cpp | 2 +- dbms/src/Interpreters/loadMetadata.cpp | 1 + dbms/src/Parsers/ASTIdentifier.cpp | 66 +++++++++++++++++++ dbms/src/Parsers/ASTIdentifier.h | 38 +++++++++-- dbms/src/Parsers/ExpressionElementParsers.cpp | 17 +++-- dbms/src/Parsers/ExpressionListParsers.cpp | 3 +- dbms/src/Parsers/ParserAlterQuery.cpp | 3 +- dbms/src/Parsers/ParserCheckQuery.cpp | 8 +-- dbms/src/Parsers/ParserCreateQuery.cpp | 29 ++++---- dbms/src/Parsers/ParserCreateQuery.h | 5 +- dbms/src/Parsers/ParserDropQuery.cpp | 10 ++- dbms/src/Parsers/ParserInsertQuery.cpp | 11 +--- dbms/src/Parsers/ParserOptimizeQuery.cpp | 8 +-- dbms/src/Parsers/ParserQueryWithOutput.cpp | 2 +- dbms/src/Parsers/ParserRenameQuery.cpp | 7 +- dbms/src/Parsers/ParserSetQuery.cpp | 2 +- dbms/src/Parsers/ParserShowTablesQuery.cpp | 3 +- .../Parsers/ParserTablePropertiesQuery.cpp | 6 +- dbms/src/Parsers/ParserUseQuery.cpp | 5 +- .../src/Parsers/parseDatabaseAndTableName.cpp | 7 +- .../parseIdentifierOrStringLiteral.cpp | 2 +- dbms/src/Storages/AlterCommands.cpp | 7 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 27 ++++---- .../MergeTree/MergeTreeWhereOptimizer.h | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 24 ++----- dbms/src/Storages/PartitionCommands.cpp | 3 +- dbms/src/Storages/StorageFile.cpp | 10 +-- dbms/src/Storages/StorageHDFS.cpp | 2 - dbms/src/Storages/StorageJoin.cpp | 19 +++--- dbms/src/Storages/VirtualColumnUtils.cpp | 8 +-- .../transformQueryForExternalDatabase.cpp | 2 +- .../TableFunctions/TableFunctionRemote.cpp | 9 +-- 49 files changed, 253 insertions(+), 226 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 04786922966..7a1b163f3b6 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -317,9 +317,9 @@ static DataTypePtr create(const ASTPtr & arguments) params_row[i] = lit->value; } } - else if (const ASTIdentifier * identifier = typeid_cast(arguments->children[0].get())) + else if (auto opt_name = getIdentifierName(arguments->children[0])) { - function_name = identifier->name; + function_name = *opt_name; } else if (typeid_cast(arguments->children[0].get())) { diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index cb1c7587080..958d65b7128 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index e64851bf470..3189701d13c 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c39ea9c9495..c3ea45bf817 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include @@ -118,8 +117,7 @@ NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpressio } else if (table_expression.database_and_table_name) { - const auto & identifier = static_cast(*table_expression.database_and_table_name); - DatabaseAndTableWithAlias database_table(identifier); + DatabaseAndTableWithAlias database_table(table_expression.database_and_table_name); const auto & table = context.getTable(database_table.database, database_table.table); names_and_type_list = table->getSampleBlockNonMaterialized().getNamesAndTypesList(); } diff --git a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h index de75f4622ef..15985eb7ba2 100644 --- a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -58,7 +58,7 @@ private: NameToNameMap & array_join_alias_to_name = data.array_join_alias_to_name; NameToNameMap & array_join_result_to_source = data.array_join_result_to_source; - if (!node.general()) + if (!getColumnIdentifierName(node)) return; auto splitted = Nested::splitName(node.name); /// ParsedParams, Key1 diff --git a/dbms/src/Interpreters/ColumnNamesContext.cpp b/dbms/src/Interpreters/ColumnNamesContext.cpp index 1d17106d3e7..246b5f5306e 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.cpp +++ b/dbms/src/Interpreters/ColumnNamesContext.cpp @@ -31,7 +31,7 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public) void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public) { - if (!node.general()) + if (!getColumnIdentifierName(node)) return; required_names.insert(node.name); diff --git a/dbms/src/Interpreters/ColumnNamesContext.h b/dbms/src/Interpreters/ColumnNamesContext.h index a605903580f..0827463692c 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.h +++ b/dbms/src/Interpreters/ColumnNamesContext.h @@ -39,9 +39,7 @@ struct ColumnNamesContext std::optional name() const { if (expr) - if (auto * node = expr->database_and_table_name.get()) - if (auto * identifier = typeid_cast(node)) - return identifier->name; + return getIdentifierName(expr->database_and_table_name); return {}; } diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index df430ee0fbd..154484ab5b6 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -54,8 +54,6 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie { size_t num_qualifiers_to_strip = 0; - auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; - /// It is compound identifier if (!identifier.children.empty()) { @@ -64,16 +62,16 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie /// database.table.column if (num_components >= 3 && !names.database.empty() - && get_identifier_name(identifier.children[0]) == names.database - && get_identifier_name(identifier.children[1]) == names.table) + && *getIdentifierName(identifier.children[0]) == names.database + && *getIdentifierName(identifier.children[1]) == names.table) { num_qualifiers_to_strip = 2; } /// table.column or alias.column. If num_components > 2, it is like table.nested.column. if (num_components >= 2 - && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) - || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) + && ((!names.table.empty() && *getIdentifierName(identifier.children[0]) == names.table) + || (!names.alias.empty() && *getIdentifierName(identifier.children[0]) == names.alias))) { num_qualifiers_to_strip = 1; } @@ -94,26 +92,24 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident if (identifier.children.size() != 2) throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - const ASTIdentifier * db_identifier = typeid_cast(identifier.children[0].get()); - const ASTIdentifier * table_identifier = typeid_cast(identifier.children[1].get()); - if (!db_identifier || !table_identifier) - throw Exception("Logical error: identifiers expected", ErrorCodes::LOGICAL_ERROR); - - database = db_identifier->name; - table = table_identifier->name; + getIdentifierName(identifier.children[0], database); + getIdentifierName(identifier.children[1], table); } } +DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database) +{ + const auto * identifier = typeid_cast(node.get()); + if (!identifier) + throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); + + *this = DatabaseAndTableWithAlias(*identifier, current_database); +} + DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database) { if (table_expression.database_and_table_name) - { - const auto * identifier = typeid_cast(table_expression.database_and_table_name.get()); - if (!identifier) - throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - - *this = DatabaseAndTableWithAlias(*identifier, current_database); - } + *this = DatabaseAndTableWithAlias(table_expression.database_and_table_name, current_database); else if (table_expression.table_function) alias = table_expression.table_function->tryGetAlias(); else if (table_expression.subquery) @@ -207,14 +203,10 @@ std::optional getDatabaseAndTable(const ASTSelectQuer return {}; ASTPtr database_and_table_name = table_expression->database_and_table_name; - if (!database_and_table_name) + if (!database_and_table_name || !isIdentifier(database_and_table_name)) return {}; - const ASTIdentifier * identifier = typeid_cast(database_and_table_name.get()); - if (!identifier) - return {}; - - return *identifier; + return DatabaseAndTableWithAlias(database_and_table_name); } ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 8076deb5ee9..601bde82e2f 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -24,6 +24,7 @@ struct DatabaseAndTableWithAlias String alias; DatabaseAndTableWithAlias() = default; + DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 52a6c8a5e17..78b2c2cfffb 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -548,8 +548,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty /// TODO This syntax does not support specifying a database name. if (table_to_join.database_and_table_name) { - const auto & identifier = static_cast(*table_to_join.database_and_table_name); - DatabaseAndTableWithAlias database_table(identifier); + DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name); StoragePtr table = context.tryGetTable(database_table.database, database_table.table); if (table) diff --git a/dbms/src/Interpreters/ExternalTablesVisitor.h b/dbms/src/Interpreters/ExternalTablesVisitor.h index ffc51bf7890..d8b177b1ed3 100644 --- a/dbms/src/Interpreters/ExternalTablesVisitor.h +++ b/dbms/src/Interpreters/ExternalTablesVisitor.h @@ -33,9 +33,9 @@ public: private: static std::vector visit(const ASTIdentifier & node, ASTPtr &, Data & data) { - if (node.special()) - if (StoragePtr external_storage = data.context.tryGetExternalTable(node.name)) - data.external_tables[node.name] = external_storage; + if (auto opt_name = getTableIdentifierName(node)) + if (StoragePtr external_storage = data.context.tryGetExternalTable(*opt_name)) + data.external_tables[*opt_name] = external_storage; return {}; } }; diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 58e28f9bfc6..0b118f98057 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include @@ -82,12 +81,7 @@ void forEachTable(IAST * node, F && f) StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context) { - const ASTIdentifier * id = typeid_cast(database_and_table.get()); - if (!id) - throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - - DatabaseAndTableWithAlias db_and_table(*id); - + DatabaseAndTableWithAlias db_and_table(database_and_table); return context.tryGetTable(db_and_table.database, db_and_table.table); } diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 9aec49f27f1..9e7fbec6217 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -95,13 +95,11 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto database_ptr = identifier->children[0]; auto table_ptr = identifier->children[1]; - if (database_ptr) - database_name = typeid_cast(*database_ptr).name; - if (table_ptr) - table_name = typeid_cast(*table_ptr).name; + getIdentifierName(database_ptr, database_name); + getIdentifierName(table_ptr, table_name); } else - table_name = typeid_cast(*identifier).name; + getIdentifierName(identifier, table_name); table = context.getTable(database_name, table_name); } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 80c670c8794..a52df54e626 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -83,8 +83,8 @@ static void appendTableNameAndAlias(std::vector & hidden, const ASTPtr & if (!alias.empty()) hidden.push_back(alias); - if (auto * identifier = typeid_cast(table_expression->children[0].get())) - hidden.push_back(identifier->name); + if (auto opt_name = getIdentifierName(table_expression->children[0])) + hidden.push_back(*opt_name); else if (alias.empty()) throw Exception("Expected Identifier or subquery with alias", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 328b3f6bf6a..e422d247a9c 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -68,9 +68,12 @@ void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (functionIsInOrGlobalInOperator(func_name)) - if (ASTIdentifier * right = typeid_cast(func_arguments->children.at(1).get())) - if (!aliases.count(right->name)) - right->setSpecial(); + { + auto & ast = func_arguments->children.at(1); + if (auto opt_name = getIdentifierName(ast)) + if (!aliases.count(*opt_name)) + setIdentifierSpecial(ast); + } /// Special cases for count function. String func_name_lowercase = Poco::toLower(func_name); @@ -97,7 +100,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) auto & current_asts = data.current_asts; String & current_alias = data.current_alias; - if (!node.general()) + if (!getColumnIdentifierName(node)) return; /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). @@ -114,9 +117,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) if (!my_alias.empty() && my_alias != alias_node->getAliasOrColumnName()) { /// Avoid infinite recursion here - auto replace_to_identifier = typeid_cast(alias_node.get()); - bool is_cycle = replace_to_identifier && replace_to_identifier->general() - && replace_to_identifier->name == node.name; + auto opt_name = getColumnIdentifierName(alias_node); + bool is_cycle = opt_name && *opt_name == node.name; if (!is_cycle) { @@ -195,10 +197,8 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & { if (node.table_expression) { - auto & database_and_table_name = static_cast(*node.table_expression).database_and_table_name; - if (database_and_table_name) - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) - right->setSpecial(); + auto & expr = static_cast(*node.table_expression); + setIdentifierSpecial(expr.database_and_table_name); } } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 5b40200c019..81bcca0bbc6 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -443,7 +443,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const const String nested_table_name = ast->getColumnName(); const String nested_table_alias = ast->getAliasOrColumnName(); - if (nested_table_alias == nested_table_name && !typeid_cast(ast.get())) + if (nested_table_alias == nested_table_name && !isIdentifier(ast)) throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED); @@ -471,7 +471,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const String result_name = expr->getAliasOrColumnName(); /// This is an array. - if (!typeid_cast(expr.get()) || source_columns_set.count(source_name)) + if (!isIdentifier(expr) || source_columns_set.count(source_name)) { result.array_join_result_to_source[result_name] = source_name; } @@ -528,10 +528,10 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function get_table_belonging; get_table_belonging = [&](const ASTPtr & ast) -> TableBelonging { - auto * identifier = typeid_cast(ast.get()); - if (identifier) + if (getColumnIdentifierName(ast)) { - if (identifier->general()) + auto * identifier = typeid_cast(ast.get()); + { auto left_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, left_source_names); auto right_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, right_source_names); @@ -567,9 +567,10 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function translate_qualified_names; translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table) { - if (auto * identifier = typeid_cast(ast.get())) + if (getColumnIdentifierName(ast)) { - if (identifier->general()) + auto * identifier = typeid_cast(ast.get()); + { auto num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, source_names); stripIdentifier(ast, num_components); diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 6ceb0cfe524..03726995f17 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -55,7 +55,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier const NameSet & source_columns = data.source_columns; const std::vector & tables = data.tables; - if (identifier.general()) + if (getColumnIdentifierName(identifier)) { /// Select first table name with max number of qualifiers which can be stripped. size_t max_num_qualifiers_to_strip = 0; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 75fd09f5676..3911e437fa6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -499,7 +499,7 @@ void executeQuery( } String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) - ? typeid_cast(*ast_query_with_output->format).name + ? *getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat(); BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in->getHeader()); diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 38e8407082d..2eae6ba4c29 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -22,6 +22,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTIdentifier.cpp b/dbms/src/Parsers/ASTIdentifier.cpp index efe796c3018..b463a65a84a 100644 --- a/dbms/src/Parsers/ASTIdentifier.cpp +++ b/dbms/src/Parsers/ASTIdentifier.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -38,4 +39,69 @@ void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const writeString(name, ostr); } +bool isIdentifier(const IAST * const ast) +{ + if (ast) + return typeid_cast(ast); + return false; +} + +std::optional getIdentifierName(const IAST * const ast) +{ + if (ast) + if (auto node = typeid_cast(ast)) + return node->name; + return {}; +} + +bool getIdentifierName(const ASTPtr & ast, String & name) +{ + if (ast) + if (auto node = typeid_cast(ast.get())) + { + name = node->name; + return true; + } + return false; +} + +std::optional getColumnIdentifierName(const ASTIdentifier & node) +{ + if (!node.special()) + return node.name; + return {}; +} + +std::optional getColumnIdentifierName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (!id->special()) + return id->name; + return {}; +} + +std::optional getTableIdentifierName(const ASTIdentifier & node) +{ + if (node.special()) + return node.name; + return {}; +} + +std::optional getTableIdentifierName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (id->special()) + return id->name; + return {}; +} + +void setIdentifierSpecial(ASTPtr & ast) +{ + if (ast) + if (ASTIdentifier * id = typeid_cast(ast.get())) + id->setSpecial(); +} + } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index b8c56727e17..8ae65065e1d 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -1,13 +1,14 @@ #pragma once +#include + #include namespace DB { -/** Identifier (column or alias) - */ +/// Identifier (column, table or alias) class ASTIdentifier : public ASTWithAlias { enum Kind /// TODO This is semantic, not syntax. Remove it. @@ -33,10 +34,6 @@ public: set.insert(name); } - void setSpecial() { kind = Special; } - bool general() const { return kind == General; } - bool special() const { return kind == Special; } - static std::shared_ptr createSpecial(const String & name_) { return std::make_shared(name_, ASTIdentifier::Special); @@ -48,6 +45,35 @@ protected: private: Kind kind; + + void setSpecial() { kind = Special; } + bool special() const { return kind == Special; } + + friend void setIdentifierSpecial(ASTPtr &); + friend std::optional getColumnIdentifierName(const ASTIdentifier & node); + friend std::optional getColumnIdentifierName(const ASTPtr & ast); + friend std::optional getTableIdentifierName(const ASTIdentifier & node); + friend std::optional getTableIdentifierName(const ASTPtr & ast); }; + +/// ASTIdentifier Helpers: hide casts and semantic. + +bool isIdentifier(const IAST * const ast); +inline bool isIdentifier(const ASTPtr & ast) { return isIdentifier(ast.get()); } + +std::optional getIdentifierName(const IAST * const ast); +inline std::optional getIdentifierName(const ASTPtr & ast) { return getIdentifierName(ast.get()); } +bool getIdentifierName(const ASTPtr & ast, String & name); + +/// @returns name for column identifiers +std::optional getColumnIdentifierName(const ASTIdentifier & node); +std::optional getColumnIdentifierName(const ASTPtr & ast); + +/// @returns name for 'not a column' identifiers +std::optional getTableIdentifierName(const ASTIdentifier & node); +std::optional getTableIdentifierName(const ASTPtr & ast); + +void setIdentifierSpecial(ASTPtr & ast); + } diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 89081da9211..9ba848dbe1a 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -173,7 +174,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex { if (!name.empty()) name += '.'; - name += static_cast(*child.get()).name; + name += *getIdentifierName(child); } node = std::make_shared(name); @@ -222,7 +223,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, * and the query silently returns an unexpected result. */ - if (typeid_cast(*identifier).name == "toDate" + if (*getIdentifierName(identifier) == "toDate" && contents_end - contents_begin == strlen("2014-01-01") && contents_begin[0] >= '2' && contents_begin[0] <= '3' && contents_begin[1] >= '0' && contents_begin[1] <= '9' @@ -264,7 +265,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } auto function_node = std::make_shared(); - function_node->name = typeid_cast(*identifier).name; + getIdentifierName(identifier, function_node->name); /// func(DISTINCT ...) is equivalent to funcDistinct(...) if (has_distinct_modifier) @@ -1157,7 +1158,7 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) * and in the query "SELECT x FRO FROM t", the word FRO was considered an alias. */ - const String & name = static_cast(*node.get()).name; + const String name = *getIdentifierName(node); for (const char ** keyword = restricted_keywords; *keyword != nullptr; ++keyword) if (0 == strcasecmp(name.data(), *keyword)) @@ -1249,18 +1250,16 @@ bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & exp */ bool allow_alias_without_as_keyword_now = allow_alias_without_as_keyword; if (allow_alias_without_as_keyword) - if (const ASTIdentifier * id = typeid_cast(node.get())) - if (0 == strcasecmp(id->name.data(), "FROM")) + if (auto opt_id = getIdentifierName(node)) + if (0 == strcasecmp(opt_id->data(), "FROM")) allow_alias_without_as_keyword_now = false; ASTPtr alias_node; if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, alias_node, expected)) { - String alias_name = typeid_cast(*alias_node).name; - if (ASTWithAlias * ast_with_alias = dynamic_cast(node.get())) { - ast_with_alias->alias = alias_name; + getIdentifierName(alias_node, ast_with_alias->alias); ast_with_alias->prefer_alias_to_column_name = prefer_alias_to_column_name; } else diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index de6fc2dc129..9f17b3935f5 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -1,10 +1,9 @@ +#include #include #include #include - #include #include - #include #include diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index b17467ed365..91aa211065e 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -312,7 +313,7 @@ bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!p_expression.parse(pos, assignment->expression, expected)) return false; - assignment->column_name = typeid_cast(*column).name; + getIdentifierName(column, assignment->column_name); if (assignment->expression) assignment->children.push_back(assignment->expression); diff --git a/dbms/src/Parsers/ParserCheckQuery.cpp b/dbms/src/Parsers/ParserCheckQuery.cpp index d9fd46694d6..cd25e60b887 100644 --- a/dbms/src/Parsers/ParserCheckQuery.cpp +++ b/dbms/src/Parsers/ParserCheckQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -31,15 +29,15 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto query = std::make_shared(); - query->database = typeid_cast(*database).name; - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); node = query; } else { table = database; auto query = std::make_shared(); - query->table = typeid_cast(*table).name; + getIdentifierName(table, query->table); node = query; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index f44b7d35eb6..79767218d22 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -35,7 +35,7 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto func = std::make_shared(); - func->name = typeid_cast(*name).name; + getIdentifierName(name, func->name); func->arguments = columns; func->children.push_back(columns); node = func; @@ -70,7 +70,7 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, if (non_parametric.parse(pos, ident, expected)) { auto func = std::make_shared(); - func->name = typeid_cast(*ident).name; + getIdentifierName(ident, func->name); node = func; return true; } @@ -257,10 +257,8 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->if_not_exists = if_not_exists; query->cluster = cluster_str; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); return true; } @@ -405,23 +403,18 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->is_populate = is_populate; query->temporary = is_temporary; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); query->cluster = cluster_str; - if (to_database) - query->to_database = typeid_cast(*to_database).name; - if (to_table) - query->to_table = typeid_cast(*to_table).name; + getIdentifierName(to_database, query->to_database); + getIdentifierName(to_table, query->to_table); query->set(query->columns, columns); query->set(query->storage, storage); - if (as_database) - query->as_database = typeid_cast(*as_database).name; - if (as_table) - query->as_table = typeid_cast(*as_table).name; + + getIdentifierName(as_database, query->as_database); + getIdentifierName(as_table, query->as_table); query->set(query->select, select); return true; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 27b4cd21fbd..1efe4e41bde 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -8,7 +8,6 @@ #include #include #include -#include #include @@ -74,7 +73,7 @@ bool IParserNameTypePair::parseImpl(Pos & pos, ASTPtr & node, Expect && type_parser.parse(pos, type, expected)) { auto name_type_pair = std::make_shared(); - name_type_pair->name = typeid_cast(*name).name; + getIdentifierName(name, name_type_pair->name); name_type_pair->type = type; name_type_pair->children.push_back(type); node = name_type_pair; @@ -181,7 +180,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E const auto column_declaration = std::make_shared(); node = column_declaration; - column_declaration->name = typeid_cast(*name).name; + getIdentifierName(name, column_declaration->name); if (type) { diff --git a/dbms/src/Parsers/ParserDropQuery.cpp b/dbms/src/Parsers/ParserDropQuery.cpp index 5ff69291e2f..c3a97a222d2 100644 --- a/dbms/src/Parsers/ParserDropQuery.cpp +++ b/dbms/src/Parsers/ParserDropQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -117,10 +115,10 @@ bool ParserDropQuery::parseDropQuery(Pos & pos, ASTPtr & node, Expected & expect query->kind = ASTDropQuery::Kind::Drop; query->if_exists = if_exists; query->temporary = temporary; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); + query->cluster = cluster_str; return true; diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 73aca09c210..017c4ad67ab 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -9,8 +9,6 @@ #include #include -#include - namespace DB { @@ -136,14 +134,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - if (database) - query->database = typeid_cast(*database).name; - - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); } - if (format) - query->format = typeid_cast(*format).name; + getIdentifierName(format, query->format); query->columns = columns; query->select = select; diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index 835db12cbb3..f749b316794 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -5,8 +5,6 @@ #include #include -#include - namespace DB { @@ -60,10 +58,8 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte auto query = std::make_shared(); node = query; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); query->cluster = cluster_str; query->partition = partition; diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp index 48f0fd9c33b..2e1a5ff529a 100644 --- a/dbms/src/Parsers/ParserQueryWithOutput.cpp +++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp @@ -76,7 +76,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!format_p.parse(pos, query_with_output.format, expected)) return false; - typeid_cast(*(query_with_output.format)).setSpecial(); + setIdentifierSpecial(query_with_output.format); query_with_output.children.push_back(query_with_output.format); } diff --git a/dbms/src/Parsers/ParserRenameQuery.cpp b/dbms/src/Parsers/ParserRenameQuery.cpp index 6eb8d768df9..aa5fb43742b 100644 --- a/dbms/src/Parsers/ParserRenameQuery.cpp +++ b/dbms/src/Parsers/ParserRenameQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -31,8 +29,9 @@ static bool parseDatabaseAndTable( return false; } - db_and_table.database = database ? typeid_cast(*database).name : ""; - db_and_table.table = typeid_cast(*table).name; + db_and_table.database.clear(); + getIdentifierName(database, db_and_table.database); + getIdentifierName(table, db_and_table.table); return true; } diff --git a/dbms/src/Parsers/ParserSetQuery.cpp b/dbms/src/Parsers/ParserSetQuery.cpp index 11f125bb955..14b5b4bec5e 100644 --- a/dbms/src/Parsers/ParserSetQuery.cpp +++ b/dbms/src/Parsers/ParserSetQuery.cpp @@ -31,7 +31,7 @@ static bool parseNameValuePair(ASTSetQuery::Change & change, IParser::Pos & pos, if (!value_p.parse(pos, value, expected)) return false; - change.name = typeid_cast(*name).name; + getIdentifierName(name, change.name); change.value = typeid_cast(*value).value; return true; diff --git a/dbms/src/Parsers/ParserShowTablesQuery.cpp b/dbms/src/Parsers/ParserShowTablesQuery.cpp index e4d6b5288d2..dc854883cfe 100644 --- a/dbms/src/Parsers/ParserShowTablesQuery.cpp +++ b/dbms/src/Parsers/ParserShowTablesQuery.cpp @@ -65,8 +65,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - if (database) - query->from = typeid_cast(*database).name; + getIdentifierName(database, query->from); if (like) query->like = safeGet(typeid_cast(*like).value); diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp index 45e92f9e181..f736023e0d5 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp @@ -75,10 +75,8 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & } } - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); node = query; diff --git a/dbms/src/Parsers/ParserUseQuery.cpp b/dbms/src/Parsers/ParserUseQuery.cpp index 9e521a0d746..a7c66c570b8 100644 --- a/dbms/src/Parsers/ParserUseQuery.cpp +++ b/dbms/src/Parsers/ParserUseQuery.cpp @@ -15,16 +15,15 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_use("USE"); ParserIdentifier name_p; - ASTPtr database; - if (!s_use.ignore(pos, expected)) return false; + ASTPtr database; if (!name_p.parse(pos, database, expected)) return false; auto query = std::make_shared(); - query->database = typeid_cast(*database).name; + getIdentifierName(database, query->database); node = query; return true; diff --git a/dbms/src/Parsers/parseDatabaseAndTableName.cpp b/dbms/src/Parsers/parseDatabaseAndTableName.cpp index b7885eb293b..d7a199a3486 100644 --- a/dbms/src/Parsers/parseDatabaseAndTableName.cpp +++ b/dbms/src/Parsers/parseDatabaseAndTableName.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB @@ -30,13 +29,13 @@ bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & return false; } - database_str = typeid_cast(*database).name; - table_str = typeid_cast(*table).name; + getIdentifierName(database, database_str); + getIdentifierName(table, table_str); } else { database_str = ""; - table_str = typeid_cast(*database).name; + getIdentifierName(database, table_str); } return true; diff --git a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp index eaff2e85a9a..2fa71415efb 100644 --- a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -20,7 +20,7 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str result = typeid_cast(*res).value.safeGet(); } else - result = typeid_cast(*res).name; + result = *getIdentifierName(res); return true; } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b5fbe0f3314..164ede64fab 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -50,7 +50,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } if (command_ast->column) - command.after_column = typeid_cast(*command_ast->column).name; + command.after_column = *getIdentifierName(command_ast->column); command.if_not_exists = command_ast->if_not_exists; @@ -63,7 +63,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.type = AlterCommand::DROP_COLUMN; - command.column_name = typeid_cast(*(command_ast->column)).name; + command.column_name = *getIdentifierName(command_ast->column); command.if_exists = command_ast->if_exists; return command; } @@ -99,8 +99,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.type = COMMENT_COLUMN; - const auto & ast_identifier = typeid_cast(*command_ast->column); - command.column_name = ast_identifier.name; + command.column_name = *getIdentifierName(command_ast->column); const auto & ast_comment = typeid_cast(*command_ast->comment); command.comment = ast_comment.value.get(); command.if_exists = command_ast->if_exists; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 5e8dbee81d4..850c696b266 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -121,7 +121,7 @@ void MergeTreeWhereOptimizer::optimizeConjunction(ASTSelectQuery & select, ASTFu SCOPE_EXIT(++idx); - if (cannotBeMoved(condition)) + if (cannotBeMoved(conditions[idx])) continue; IdentifierNameSet identifiers{}; @@ -193,7 +193,7 @@ void MergeTreeWhereOptimizer::optimizeArbitrary(ASTSelectQuery & select) const auto & condition = select.where_expression; /// do not optimize restricted expressions - if (cannotBeMoved(select.where_expression.get())) + if (cannotBeMoved(select.where_expression)) return; IdentifierNameSet identifiers{}; @@ -250,10 +250,10 @@ bool MergeTreeWhereOptimizer::isConditionGood(const IAST * condition) const auto right_arg = function->arguments->children.back().get(); /// try to ensure left_arg points to ASTIdentifier - if (!typeid_cast(left_arg) && typeid_cast(right_arg)) + if (!isIdentifier(left_arg) && isIdentifier(right_arg)) std::swap(left_arg, right_arg); - if (typeid_cast(left_arg)) + if (isIdentifier(left_arg)) { /// condition may be "good" if only right_arg is a constant and its value is outside the threshold if (const auto literal = typeid_cast(right_arg)) @@ -286,8 +286,8 @@ bool MergeTreeWhereOptimizer::isConditionGood(const IAST * condition) const void MergeTreeWhereOptimizer::collectIdentifiersNoSubqueries(const IAST * const ast, IdentifierNameSet & set) { - if (const auto identifier = typeid_cast(ast)) - return (void) set.insert(identifier->name); + if (auto opt_name = getIdentifierName(ast)) + return (void) set.insert(*opt_name); if (typeid_cast(ast)) return; @@ -364,9 +364,9 @@ bool MergeTreeWhereOptimizer::isSubsetOfTableColumns(const IdentifierNameSet & i } -bool MergeTreeWhereOptimizer::cannotBeMoved(const IAST * ptr) const +bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const { - if (const auto function_ptr = typeid_cast(ptr)) + if (const auto function_ptr = typeid_cast(ptr.get())) { /// disallow arrayJoin expressions to be moved to PREWHERE for now if (array_join_function_name == function_ptr->name) @@ -381,17 +381,16 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const IAST * ptr) const if ("indexHint" == function_ptr->name) return true; } - else if (const auto identifier_ptr = typeid_cast(ptr)) + else if (auto opt_name = getColumnIdentifierName(ptr)) { /// disallow moving result of ARRAY JOIN to PREWHERE - if (identifier_ptr->general()) - if (array_joined_names.count(identifier_ptr->name) || - array_joined_names.count(Nested::extractTableName(identifier_ptr->name))) - return true; + if (array_joined_names.count(*opt_name) || + array_joined_names.count(Nested::extractTableName(*opt_name))) + return true; } for (const auto & child : ptr->children) - if (cannotBeMoved(child.get())) + if (cannotBeMoved(child)) return true; return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 4d0ccbcff7e..ccf4970d300 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -68,7 +68,7 @@ private: * * Also, disallow moving expressions with GLOBAL [NOT] IN. */ - bool cannotBeMoved(const IAST * ptr) const; + bool cannotBeMoved(const ASTPtr & ptr) const; void determineArrayJoinedNames(ASTSelectQuery & select); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 54b092fdb62..e0903138220 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -42,13 +42,13 @@ static Names extractColumnNames(const ASTPtr & node) Names res; res.reserve(elements.size()); for (const auto & elem : elements) - res.push_back(typeid_cast(*elem).name); + res.push_back(*getIdentifierName(elem)); return res; } else { - return { typeid_cast(*node).name }; + return { *getIdentifierName(node) }; } } @@ -481,9 +481,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.sign_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.sign_column)) throw Exception( "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -495,9 +493,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column. if (!engine_args.empty() && !typeid_cast(engine_args.back().get())) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.version_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.version_column)) throw Exception( "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -535,18 +531,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) } else if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.version_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.version_column)) throw Exception( "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); engine_args.pop_back(); - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.sign_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.sign_column)) throw Exception( "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -592,9 +584,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// Now only three parameters remain - date (or partitioning expression), primary_key, index_granularity. - if (auto ast = typeid_cast(engine_args[0].get())) - date_column_name = ast->name; - else + if (!getIdentifierName(engine_args[0], date_column_name)) throw Exception( "Date column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index b6ea7794cbe..c7a228c3e78 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB @@ -66,7 +65,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * PartitionCommand res; res.type = CLEAR_COLUMN; res.partition = command_ast->partition; - const Field & column_name = typeid_cast(*(command_ast->column)).name; + const Field & column_name = *getIdentifierName(command_ast->column); res.column_name = column_name; return res; } diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index de017c8612e..f24badfa82f 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -304,16 +304,16 @@ void registerStorageFile(StorageFactory & factory) { /// Will use FD if engine_args[1] is int literal or identifier with std* name - if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) + if (auto opt_name = getIdentifierName(engine_args[1])) { - if (identifier->name == "stdin") + if (*opt_name == "stdin") source_fd = STDIN_FILENO; - else if (identifier->name == "stdout") + else if (*opt_name == "stdout") source_fd = STDOUT_FILENO; - else if (identifier->name == "stderr") + else if (*opt_name == "stderr") source_fd = STDERR_FILENO; else - throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", + throw Exception("Unknown identifier '" + *opt_name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); } else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 97b0af65f87..6dd5cf4c92e 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -16,7 +15,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 8d1d6d52fbf..e188afc1dab 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -88,11 +87,11 @@ void registerStorageJoin(StorageFactory & factory) "Storage Join requires at least 3 parameters: Join(ANY|ALL, LEFT|INNER, keys...).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const ASTIdentifier * strictness_id = typeid_cast(engine_args[0].get()); - if (!strictness_id) + auto opt_strictness_id = getIdentifierName(engine_args[0]); + if (!opt_strictness_id) throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const String strictness_str = Poco::toLower(strictness_id->name); + const String strictness_str = Poco::toLower(*opt_strictness_id); ASTTableJoin::Strictness strictness; if (strictness_str == "any") strictness = ASTTableJoin::Strictness::Any; @@ -101,11 +100,11 @@ void registerStorageJoin(StorageFactory & factory) else throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const ASTIdentifier * kind_id = typeid_cast(engine_args[1].get()); - if (!kind_id) + auto opt_kind_id = getIdentifierName(engine_args[1]); + if (!opt_kind_id) throw Exception("Second parameter of storage Join must be LEFT or INNER (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const String kind_str = Poco::toLower(kind_id->name); + const String kind_str = Poco::toLower(*opt_kind_id); ASTTableJoin::Kind kind; if (kind_str == "left") kind = ASTTableJoin::Kind::Left; @@ -122,11 +121,11 @@ void registerStorageJoin(StorageFactory & factory) key_names.reserve(engine_args.size() - 2); for (size_t i = 2, size = engine_args.size(); i < size; ++i) { - const ASTIdentifier * key = typeid_cast(engine_args[i].get()); - if (!key) + auto opt_key = getIdentifierName(engine_args[i]); + if (!opt_key) throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS); - key_names.push_back(key->name); + key_names.push_back(*opt_key); } auto & settings = args.context.getSettingsRef(); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 6ce3e58cc75..990a587445c 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -96,11 +96,9 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) if (!isValidFunction(expression->children[i], columns)) return false; - if (const ASTIdentifier * identifier = typeid_cast(&*expression)) - { - if (identifier->general()) - return columns.count(identifier->name); - } + if (auto opt_name = getColumnIdentifierName(expression)) + return columns.count(*opt_name); + return true; } diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 0131d9f2162..aea176def3b 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -76,7 +76,7 @@ static bool isCompatible(const IAST & node) return true; } - if (typeid_cast(&node)) + if (isIdentifier(&node)) return true; return false; diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index fc23956ef4f..5f81a9c21ec 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -65,9 +65,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C } else { - if (auto ast_cluster = typeid_cast(args[arg_num].get())) - cluster_name = ast_cluster->name; - else + if (!getIdentifierName(args[arg_num], cluster_name)) cluster_description = getStringLiteral(*args[arg_num], "Hosts pattern"); } ++arg_num; @@ -132,9 +130,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C /// ExpressionAnalyzer will be created in InterpreterSelectQuery that will meet these `Identifier` when processing the request. /// We need to mark them as the name of the database or table, because the default value is column. - for (auto & arg : args) - if (ASTIdentifier * id = typeid_cast(arg.get())) - id->setSpecial(); + for (auto ast : args) + setIdentifierSpecial(ast); ClusterPtr cluster; if (!cluster_name.empty())