From 24caaec50c7a47c2190bb266c0cf4787fb253964 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 2 Apr 2018 20:17:59 +0300 Subject: [PATCH 001/192] Squashed commit of the following: commit bedcf840b563aad3edb03b43417338fab0e7cb48 Author: proller Date: Mon Apr 2 20:17:36 2018 +0300 Revert "Prepare to new poco (PocoData renamed to PocoSQL)" This reverts commit ad5e11ad88ac4f1c3a5ad21153042e2498ca0d68. commit b7f1c352f0eb132b133846c7214e70a79f26e611 Merge: ad5e11ad8 fb7e2cbd1 Author: proller Date: Mon Apr 2 20:12:10 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit ad5e11ad88ac4f1c3a5ad21153042e2498ca0d68 Author: proller Date: Mon Apr 2 20:09:49 2018 +0300 Prepare to new poco (PocoData renamed to PocoSQL) commit fcb90ca39dd32a29e29eb68bf559f381e80f74b4 Merge: 9ded77d62 ad137994f Author: proller Date: Mon Apr 2 13:17:01 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 9ded77d62aa668c6b7b55d209d5760bc5b517fbf Merge: 14cea9052 412edac65 Author: proller Date: Fri Mar 30 21:06:20 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 14cea90524b8b977bff9b85647e00f0e1c26570b Merge: 9b6d88e67 82932f904 Author: proller Date: Fri Mar 30 14:55:42 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 9b6d88e67b114a4c42b624690d691988be39f227 Merge: 0afe7b7d1 b99783028 Author: proller Date: Thu Mar 29 20:35:15 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 0afe7b7d1f4792403ba4fb33dfb250ece2edc41d Author: proller Date: Wed Mar 28 16:03:55 2018 +0300 add docker/test commit c46f0b4084610a6d36b6823fb5ee48381866272e Author: proller Date: Wed Mar 28 15:43:53 2018 +0300 fix commit 3435dee49f31fe8f6cd9b01da4a2d5820f03a4a4 Author: proller Date: Wed Mar 28 15:18:54 2018 +0300 Fix query compile in docker, update docker image to ubuntu 17.10 artful --- docker/client/Dockerfile | 5 +++-- docker/server/docker_related_config.xml | 6 ++++++ docker/test/Dockerfile | 19 +++++++++++++++++++ docker/test/README.md | 5 +++++ 4 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 docker/test/Dockerfile create mode 100644 docker/test/README.md diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 9c8f3d087a3..59998b4a507 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,11 +1,12 @@ -FROM ubuntu:16.04 +FROM ubuntu:17.10 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG version=\* RUN apt-get update && \ - apt-get install -y apt-transport-https && \ + apt-get install -y apt-transport-https dirmngr && \ mkdir -p /etc/apt/sources.list.d && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ apt-get install --allow-unauthenticated -y clickhouse-client=$version locales && \ diff --git a/docker/server/docker_related_config.xml b/docker/server/docker_related_config.xml index ab6f82ad4c4..e1df3bb3890 100644 --- a/docker/server/docker_related_config.xml +++ b/docker/server/docker_related_config.xml @@ -3,4 +3,10 @@ 0.0.0.0 :: 1 + + diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile new file mode 100644 index 00000000000..75c9eba4d3b --- /dev/null +++ b/docker/test/Dockerfile @@ -0,0 +1,19 @@ +FROM ubuntu:17.10 + +ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" +ARG version=\* + +RUN apt-get update && \ + apt-get install -y apt-transport-https dirmngr && \ + mkdir -p /etc/apt/sources.list.d && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ + echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ + apt-get update && \ + apt-get install --allow-unauthenticated -y clickhouse-test && \ + rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ + apt-get clean + +# clickhouse-test bug: dotn start without server config, remove after release 1.1.54372 : +RUN mkdir -p /etc/clickhouse-server && echo "" > /etc/clickhouse-server/config.xml + +ENTRYPOINT ["/usr/bin/clickhouse-test"] diff --git a/docker/test/README.md b/docker/test/README.md new file mode 100644 index 00000000000..0833aacb822 --- /dev/null +++ b/docker/test/README.md @@ -0,0 +1,5 @@ +# ClickHouse Test Docker Image + +## License + +View [license information](https://github.com/yandex/ClickHouse/blob/master/LICENSE) for the software contained in this image. From 8a718b4e20a76b92c019ad49fde37344af2bde02 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 7 Apr 2018 06:42:57 +0300 Subject: [PATCH 002/192] Update Dockerfile --- docker/test/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 75c9eba4d3b..5dfbf73d255 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean -# clickhouse-test bug: dotn start without server config, remove after release 1.1.54372 : +# clickhouse-test bug: it doesn't start without server config, remove after release 1.1.54372 : RUN mkdir -p /etc/clickhouse-server && echo "" > /etc/clickhouse-server/config.xml ENTRYPOINT ["/usr/bin/clickhouse-test"] From 6d7bd19ebd0884167efe2605c4f578d74f6bb6f6 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Fri, 6 Apr 2018 19:06:07 +0300 Subject: [PATCH 003/192] fix races in leader election [#CLICKHOUSE-3533] --- dbms/src/Common/ZooKeeper/LeaderElection.h | 22 +++--- .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- .../ReplicatedMergeTreeRestartingThread.cpp | 49 +++--------- .../Storages/StorageReplicatedMergeTree.cpp | 74 ++++++++++++++----- .../src/Storages/StorageReplicatedMergeTree.h | 15 ++-- 5 files changed, 88 insertions(+), 74 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index 8dd9b1831b1..1786cc76510 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -41,10 +41,15 @@ public: createNode(); } - void yield() + void shutdown() { - releaseNode(); - createNode(); + if (shutdown_called) + return; + + shutdown_called = true; + event->set(); + if (thread.joinable()) + thread.join(); } ~LeaderElection() @@ -62,14 +67,14 @@ private: std::string node_name; std::thread thread; - std::atomic shutdown {false}; + std::atomic shutdown_called {false}; zkutil::EventPtr event = std::make_shared(); CurrentMetrics::Increment metric_increment{CurrentMetrics::LeaderElection}; void createNode() { - shutdown = false; + shutdown_called = false; node = EphemeralNodeHolder::createSequential(path + "/leader_election-", zookeeper, identifier); std::string node_path = node->getPath(); @@ -80,16 +85,13 @@ private: void releaseNode() { - shutdown = true; - event->set(); - if (thread.joinable()) - thread.join(); + shutdown(); node = nullptr; } void threadFunction() { - while (!shutdown) + while (!shutdown_called) { bool success = false; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index f7dca129bd3..fb3276fcfab 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -52,7 +52,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// This is loose condition: no problem if we actually had lost leadership at this moment /// and two replicas will try to do cleanup simultaneously. - if (storage.is_leader_node) + if (storage.is_leader) { clearOldLogs(); clearOldBlocks(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 6b20b5c86c1..1cd958c60d6 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -17,7 +17,6 @@ namespace ProfileEvents namespace CurrentMetrics { extern const Metric ReadonlyReplica; - extern const Metric LeaderReplica; } @@ -139,7 +138,7 @@ void ReplicatedMergeTreeRestartingThread::run() prev_time_of_check_delay = current_time; /// We give up leadership if the relative lag is greater than threshold. - if (storage.is_leader_node + if (storage.is_leader && relative_delay > static_cast(storage.data.settings.min_relative_delay_to_yield_leadership)) { LOG_INFO(log, "Relative replica delay (" << relative_delay << " seconds) is bigger than threshold (" @@ -147,11 +146,11 @@ void ReplicatedMergeTreeRestartingThread::run() ProfileEvents::increment(ProfileEvents::ReplicaYieldLeadership); - storage.is_leader_node = false; - CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - if (storage.merge_selecting_thread.joinable()) - storage.merge_selecting_thread.join(); - storage.leader_election->yield(); + storage.exitLeaderElection(); + /// NOTE: enterLeaderElection() can throw if node creation in ZK fails. + /// This is bad because we can end up without a leader on any replica. + /// In this case we rely on the fact that the session will expire and we will reconnect. + storage.enterLeaderElection(); } } } @@ -169,6 +168,8 @@ void ReplicatedMergeTreeRestartingThread::run() storage.data_parts_exchange_endpoint_holder->cancelForever(); storage.data_parts_exchange_endpoint_holder = nullptr; + /// Cancel fetches and merges to force the queue_task to finish ASAP. + storage.fetcher.blocker.cancelForever(); storage.merger.merges_blocker.cancelForever(); partialShutdown(); @@ -195,12 +196,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() updateQuorumIfWeHavePart(); if (storage.data.settings.replicated_can_become_leader) - storage.leader_election = std::make_shared( - storage.zookeeper_path + "/leader_election", - *storage.current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, - /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. - [this] { storage.becomeLeader(); CurrentMetrics::add(CurrentMetrics::LeaderReplica); }, - storage.replica_name); + storage.enterLeaderElection(); /// Anything above can throw a KeeperException if something is wrong with ZK. /// Anything below should not throw exceptions. @@ -222,7 +218,6 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() catch (...) { storage.replica_is_active_node = nullptr; - storage.leader_election = nullptr; try { @@ -366,17 +361,9 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() storage.replica_is_active_node = nullptr; LOG_TRACE(log, "Waiting for threads to finish"); - { - std::lock_guard lock(storage.leader_node_mutex); - bool old_val = true; - if (storage.is_leader_node.compare_exchange_strong(old_val, false)) - { - CurrentMetrics::sub(CurrentMetrics::LeaderReplica); - if (storage.merge_selecting_thread.joinable()) - storage.merge_selecting_thread.join(); - } - } + storage.exitLeaderElection(); + if (storage.queue_updating_thread.joinable()) storage.queue_updating_thread.join(); @@ -384,20 +371,6 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() storage.alter_thread.reset(); storage.part_check_thread.stop(); - /// Yielding leadership only after finish of merge_selecting_thread. - /// Otherwise race condition with parallel run of merge selecting thread on different servers is possible. - /// - /// On the other hand, leader_election could call becomeLeader() from own thread after - /// merge_selecting_thread is finished and restarting_thread is destroyed. - /// becomeLeader() recreates merge_selecting_thread and it becomes joinable again, even restarting_thread is destroyed. - /// But restarting_thread is responsible to stop merge_selecting_thread. - /// It will lead to std::terminate in ~StorageReplicatedMergeTree(). - /// Such behaviour was rarely observed on DROP queries. - /// Therefore we need either avoid becoming leader after first shutdown call (more deliberate choice), - /// either manually wait merge_selecting_thread.join() inside ~StorageReplicatedMergeTree(), either or something third. - /// So, we added shutdown check in becomeLeader() and made its creation and deletion atomic. - storage.leader_election = nullptr; - LOG_TRACE(log, "Threads finished"); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 39213653478..1548537e390 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -59,6 +59,12 @@ namespace ProfileEvents extern const Event DataAfterMergeDiffersFromReplica; } +namespace CurrentMetrics +{ + extern const Metric LeaderReplica; +} + + namespace DB { @@ -1883,7 +1889,7 @@ void StorageReplicatedMergeTree::mergeSelectingThread() && cached_merging_predicate.get(now, uncached_merging_predicate, merging_predicate_args_to_key, left, right); }; - while (!shutdown_called && is_leader_node) + while (is_leader) { bool success = false; @@ -1932,7 +1938,7 @@ void StorageReplicatedMergeTree::mergeSelectingThread() tryLogCurrentException(__PRETTY_FUNCTION__); } - if (shutdown_called || !is_leader_node) + if (!is_leader) break; if (!success) @@ -2037,23 +2043,55 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n } -void StorageReplicatedMergeTree::becomeLeader() +void StorageReplicatedMergeTree::enterLeaderElection() { - std::lock_guard lock(leader_node_mutex); + auto callback = [this]() + { + CurrentMetrics::add(CurrentMetrics::LeaderReplica); + LOG_INFO(log, "Became leader"); - if (shutdown_called) + is_leader = true; + merge_selecting_thread = std::thread(&StorageReplicatedMergeTree::mergeSelectingThread, this); + }; + + try + { + leader_election = std::make_shared( + zookeeper_path + "/leader_election", + *current_zookeeper, /// current_zookeeper lives for the lifetime of leader_election, + /// since before changing `current_zookeeper`, `leader_election` object is destroyed in `partialShutdown` method. + callback, + replica_name); + } + catch (...) + { + leader_election = nullptr; + throw; + } +} + +void StorageReplicatedMergeTree::exitLeaderElection() +{ + if (!leader_election) return; - if (merge_selecting_thread.joinable()) + /// Shut down the leader election thread to avoid suddenly becoming the leader again after + /// we have stopped the merge_selecting_thread, but before we have deleted the leader_election object. + leader_election->shutdown(); + + if (is_leader) { - LOG_INFO(log, "Deleting old leader"); - is_leader_node = false; /// exit trigger inside thread + CurrentMetrics::sub(CurrentMetrics::LeaderReplica); + LOG_INFO(log, "Stopped being leader"); + + is_leader = false; + merge_selecting_event.set(); merge_selecting_thread.join(); } - LOG_INFO(log, "Became leader"); - is_leader_node = true; - merge_selecting_thread = std::thread(&StorageReplicatedMergeTree::mergeSelectingThread, this); + /// Delete the node in ZK only after we have stopped the merge_selecting_thread - so that only one + /// replica assigns merges at any given time. + leader_election = nullptr; } @@ -2382,12 +2420,6 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { - /** This must be done before waiting for restarting_thread. - * Because restarting_thread will wait for finishing of tasks in background pool, - * and parts are fetched in that tasks. - */ - fetcher.blocker.cancelForever(); - if (restarting_thread) { restarting_thread->stop(); @@ -2399,6 +2431,8 @@ void StorageReplicatedMergeTree::shutdown() data_parts_exchange_endpoint_holder->cancelForever(); data_parts_exchange_endpoint_holder = nullptr; } + + fetcher.blocker.cancelForever(); } @@ -2487,7 +2521,7 @@ bool StorageReplicatedMergeTree::optimize(const ASTPtr & query, const ASTPtr & p { assertNotReadonly(); - if (!is_leader_node) + if (!is_leader) { sendRequestToLeaderReplica(query, context.getSettingsRef()); return true; @@ -2813,7 +2847,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt zkutil::ZooKeeperPtr zookeeper = getZooKeeper(); - if (!is_leader_node) + if (!is_leader) { sendRequestToLeaderReplica(query, context.getSettingsRef()); return; @@ -3171,7 +3205,7 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) { auto zookeeper = tryGetZooKeeper(); - res.is_leader = is_leader_node; + res.is_leader = is_leader; res.is_readonly = is_readonly; res.is_session_expired = !zookeeper || zookeeper->expired(); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index c2b09a77bf1..5d0659f19f5 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -220,8 +220,8 @@ private: /** Is this replica "leading". The leader replica selects the parts to merge. */ - std::atomic_bool is_leader_node {false}; - std::mutex leader_node_mutex; + std::atomic is_leader {false}; + zkutil::LeaderElectionPtr leader_election; InterserverIOEndpointHolderPtr data_parts_exchange_endpoint_holder; @@ -239,7 +239,6 @@ private: DataPartsExchange::Fetcher fetcher; - zkutil::LeaderElectionPtr leader_election; /// When activated, replica is initialized and startup() method could exit Poco::Event startup_event; @@ -368,9 +367,15 @@ private: */ bool queueTask(); - /// Select the parts to merge. + /// Postcondition: + /// either leader_election is fully initialized (node in ZK is created and the watching thread is launched) + /// or an exception is thrown and leader_election is destroyed. + void enterLeaderElection(); - void becomeLeader(); + /// Postcondition: + /// is_leader is false, merge_selecting_thread is stopped, leader_election is nullptr. + /// leader_election node in ZK is either deleted, or the session is marked expired. + void exitLeaderElection(); /** Selects the parts to merge and writes to the log. */ From 1e4544e05baf76ec1202e681e63504536c5017eb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 19:26:38 +0300 Subject: [PATCH 004/192] Actualized test #1846 --- .../queries/0_stateless/00441_nulls_in.reference | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00441_nulls_in.reference b/dbms/tests/queries/0_stateless/00441_nulls_in.reference index 591e55ae41a..81e812f596f 100644 --- a/dbms/tests/queries/0_stateless/00441_nulls_in.reference +++ b/dbms/tests/queries/0_stateless/00441_nulls_in.reference @@ -5,12 +5,12 @@ 0 0 1 -\N +0 1 0 0 1 -\N +0 1 0 0 @@ -27,7 +27,7 @@ 1 0 1 -\N +0 0 1 0 @@ -35,12 +35,12 @@ 0 0 1 -\N +0 1 0 0 1 -\N +0 1 0 0 @@ -57,7 +57,7 @@ 1 0 1 -\N +0 0 1 0 From 1beab90244b171263467f458d572360555687c34 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 21:09:20 +0300 Subject: [PATCH 005/192] Fixed totally wrong code in SummingMergeTree in the case of complex maps [#CLICKHOUSE-2] --- contrib/poco | 2 +- .../SummingSortedBlockInputStream.cpp | 17 ++++++++++++++--- .../00327_summing_composite_nested.sql | 7 +------ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/contrib/poco b/contrib/poco index 930a7ec1154..a107b0c9cee 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 930a7ec1154f4f9711edfb4b4a39f9fff2a5bbb5 +Subproject commit a107b0c9cee109fe0abfbf509df3c78a1e0c05fa diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index eeda7f50e75..e79366ca02d 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -330,7 +330,20 @@ void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, std:: // Start aggregations with current row addRow(current); - current_row_is_zero = true; + + if (maps_to_sum.empty()) + { + /// We have only columns_to_aggregate. The status of current row will be determined + /// in 'insertCurrentRowIfNeeded' method on the values of aggregate functions. + current_row_is_zero = true; + } + else + { + /// We have complex maps that will be summed with 'mergeMap' method. + /// The single row is considered non zero, and the status after merging with other rows + /// will be determined in the branch below (when key_differs == false). + current_row_is_zero = false; + } } else { @@ -338,10 +351,8 @@ void SummingSortedBlockInputStream::merge(MutableColumns & merged_columns, std:: // Merge maps only for same rows for (const auto & desc : maps_to_sum) - { if (mergeMap(desc, current_row, current)) current_row_is_zero = false; - } } if (!current->isLast()) diff --git a/dbms/tests/queries/0_stateless/00327_summing_composite_nested.sql b/dbms/tests/queries/0_stateless/00327_summing_composite_nested.sql index 43b37616941..e21389528e4 100644 --- a/dbms/tests/queries/0_stateless/00327_summing_composite_nested.sql +++ b/dbms/tests/queries/0_stateless/00327_summing_composite_nested.sql @@ -1,12 +1,7 @@ DROP TABLE IF EXISTS test.summing_composite_key; CREATE TABLE test.summing_composite_key (d Date, k UInt64, FirstMap Nested(k1 UInt32, k2ID Int8, s Float64), SecondMap Nested(k1ID UInt64, k2Key UInt32, k3Type Int32, s Int64)) ENGINE = SummingMergeTree(d, k, 1); -INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 1, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]); -INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 1, [2,1], [4,3], [20,22], [2,2,1], [5,5,0], [-3,-3,-33], [10,100,1000]); - -INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 2, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]); -INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 2, [2,1,1], [4,3,3], [20,22,33], [2,2], [5,5], [-3,-3], [10,100]); -INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 2, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]); +INSERT INTO test.summing_composite_key VALUES ('2000-01-01', 1, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]), ('2000-01-01', 1, [2,1], [4,3], [20,22], [2,2,1], [5,5,0], [-3,-3,-33], [10,100,1000]), ('2000-01-01', 2, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]), ('2000-01-01', 2, [2,1,1], [4,3,3], [20,22,33], [2,2], [5,5], [-3,-3], [10,100]), ('2000-01-01', 2, [1,2], [3,4], [10,11], [0,1,2], [3,4,5], [-1,-2,-3], [1,10,100]); SELECT * FROM test.summing_composite_key ORDER BY d, k, _part_index; From 813201d2f6fcda7a2430b5a8c4b26a6040270f95 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Apr 2018 21:58:26 +0300 Subject: [PATCH 006/192] revert reverted submodule contrib/poco --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index a107b0c9cee..930a7ec1154 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit a107b0c9cee109fe0abfbf509df3c78a1e0c05fa +Subproject commit 930a7ec1154f4f9711edfb4b4a39f9fff2a5bbb5 From 6dc45af5ae08cbda447a89f3133b1de30cd28565 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 22:43:37 +0300 Subject: [PATCH 007/192] Avoid herd effect in ReplicatedMergeTreeCleanupThread [#CLICKHOUSE-2] --- dbms/src/Storages/MergeTree/MergeTreeSettings.h | 3 +++ .../Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 5 ++++- .../Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h | 3 +++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 424c634afaf..aa29dccc195 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -104,6 +104,9 @@ struct MergeTreeSettings \ /** Period to clean old queue logs, blocks hashes and parts */ \ M(SettingUInt64, cleanup_delay_period, 30) \ + /** Add uniformly distributed value from 0 to x seconds to cleanup_delay_period \ + to avoid thundering herd effect and subsequent DoS of ZooKeeper in case of very large number of tables */ \ + M(SettingUInt64, cleanup_delay_period_random_add, 10) \ \ /** Minimal delay from other replicas to yield leadership. Here and further 0 means unlimited. */ \ M(SettingUInt64, min_relative_delay_to_yield_leadership, 120) \ diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index fb3276fcfab..e3d090f4212 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -3,6 +3,8 @@ #include #include +#include + namespace DB { @@ -25,7 +27,8 @@ void ReplicatedMergeTreeCleanupThread::run() { setThreadName("ReplMTCleanup"); - const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000; + const auto CLEANUP_SLEEP_MS = (storage.data.settings.cleanup_delay_period + + std::uniform_int_distribution(0, storage.data.settings.cleanup_delay_period_random_add)(rng)) * 1000; while (!storage.shutdown_called) { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index c717e84bfd5..ccbb564fa96 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -7,6 +7,8 @@ #include #include +#include + namespace DB { @@ -27,6 +29,7 @@ private: StorageReplicatedMergeTree & storage; Logger * log; std::thread thread; + pcg64 rng; void run(); void iterate(); From 62ccbba0b76a1b6fff4bcf506ccc4013133ec4b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 22:44:55 +0300 Subject: [PATCH 008/192] Avoid herd effect in ReplicatedMergeTreeCleanupThread (continued) [#CLICKHOUSE-2] --- contrib/poco | 2 +- .../Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/contrib/poco b/contrib/poco index 930a7ec1154..a107b0c9cee 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 930a7ec1154f4f9711edfb4b4a39f9fff2a5bbb5 +Subproject commit a107b0c9cee109fe0abfbf509df3c78a1e0c05fa diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index e3d090f4212..7259b91ddd7 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -27,8 +27,8 @@ void ReplicatedMergeTreeCleanupThread::run() { setThreadName("ReplMTCleanup"); - const auto CLEANUP_SLEEP_MS = (storage.data.settings.cleanup_delay_period - + std::uniform_int_distribution(0, storage.data.settings.cleanup_delay_period_random_add)(rng)) * 1000; + const auto CLEANUP_SLEEP_MS = storage.data.settings.cleanup_delay_period * 1000 + + std::uniform_int_distribution(0, storage.data.settings.cleanup_delay_period_random_add * 1000)(rng); while (!storage.shutdown_called) { From a786e92a0f250fbf2c40e2cd522b1da560625aac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 22:48:54 +0300 Subject: [PATCH 009/192] Fixed typo [#CLICKHOUSE-2] --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++----- dbms/src/Storages/StorageReplicatedMergeTree.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1548537e390..8273ef5cb8f 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3671,7 +3671,7 @@ void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, - NameSet * parts_should_be_retied) + NameSet * parts_should_be_retried) { zkutil::Requests ops; auto it_first_node_in_batch = part_names.cbegin(); @@ -3702,9 +3702,9 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & { LOG_DEBUG(log, "There is no part " << *it_in_batch << " in ZooKeeper, it was only in filesystem"); } - else if (parts_should_be_retied && zkutil::isHardwareError(cur_code)) + else if (parts_should_be_retried && zkutil::isHardwareError(cur_code)) { - parts_should_be_retied->emplace(*it_in_batch); + parts_should_be_retried->emplace(*it_in_batch); } else if (cur_code) { @@ -3712,10 +3712,10 @@ void StorageReplicatedMergeTree::removePartsFromZooKeeper(zkutil::ZooKeeperPtr & } } } - else if (parts_should_be_retied && zkutil::isHardwareError(code)) + else if (parts_should_be_retried && zkutil::isHardwareError(code)) { for (auto it_in_batch = it_first_node_in_batch; it_in_batch != it_next; ++it_in_batch) - parts_should_be_retied->emplace(*it_in_batch); + parts_should_be_retried->emplace(*it_in_batch); } else if (code) { diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index 5d0659f19f5..457e834ea1c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -333,7 +333,7 @@ private: /// Quickly removes big set of parts from ZooKeeper (using async multi queries) void removePartsFromZooKeeper(zkutil::ZooKeeperPtr & zookeeper, const Strings & part_names, - NameSet * parts_should_be_retied = nullptr); + NameSet * parts_should_be_retried = nullptr); /// Removes a part from ZooKeeper and adds a task to the queue to download it. It is supposed to do this with broken parts. void removePartAndEnqueueFetch(const String & part_name); From 40d70184a7a3523211fd70523922d2e5aaf13f74 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 22:54:18 +0300 Subject: [PATCH 010/192] ZooKeeper: fixed error [#CLICKHOUSE-2] --- dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 7b105ddec5a..37ff3078d75 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/dbms/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -714,8 +714,8 @@ void ZooKeeper::sendThread() RequestInfo info; if (requests_queue.tryPop(info, max_wait)) { - if (expired) - break; + /// After we popped element from the queue, we must register callbacks (even in the case when expired == true right now), + /// because they must not be lost (callbacks must be called because the user will wait for them). if (info.request->xid != close_xid) { @@ -732,6 +732,9 @@ void ZooKeeper::sendThread() watches[info.request->getPath()].emplace_back(std::move(info.watch)); } + if (expired) + break; + info.request->write(*out); if (info.request->xid == close_xid) From c1a077121f153164827bc6e20826966b278a58a8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 6 Apr 2018 23:03:35 +0300 Subject: [PATCH 011/192] Fixed build with clang 5 (although it is Ok on clang 6) [#CLICKHOUSE-2] --- dbms/src/Common/tests/dump_variable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/tests/dump_variable.cpp b/dbms/src/Common/tests/dump_variable.cpp index d7fcdb4bdb7..3213435ab17 100644 --- a/dbms/src/Common/tests/dump_variable.cpp +++ b/dbms/src/Common/tests/dump_variable.cpp @@ -37,7 +37,7 @@ int main(int, char **) std::initializer_list list{"hello", "world"}; DUMP(list); - std::array arr{"hello", "world"}; + std::array arr{{"hello", "world"}}; DUMP(arr); //DUMP([]{}); From 13ec841ab88fb9ff81c25deb92011b219c95c6fe Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Apr 2018 23:22:33 +0300 Subject: [PATCH 012/192] revert reverted submodule contrib/poco --- contrib/poco | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/poco b/contrib/poco index a107b0c9cee..930a7ec1154 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit a107b0c9cee109fe0abfbf509df3c78a1e0c05fa +Subproject commit 930a7ec1154f4f9711edfb4b4a39f9fff2a5bbb5 From 1fc4d30bb604e92d490408b771c506cb5639e409 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Apr 2018 13:49:42 -0700 Subject: [PATCH 013/192] Debian: temporary keep packages with old names (#2186) * cmake fixes * Fix test server config * Debian: temporary keep packages with old names * fix * fix * Debian postinst: adjustable user --- debian/clickhouse-client.postinst | 4 +++- debian/clickhouse-server-base.cron.d | 1 + debian/clickhouse-server-base.install | 11 +++++++++++ debian/clickhouse-server-base.postinst | 1 + debian/clickhouse-server-base.preinst | 1 + debian/clickhouse-server-base.prerm | 1 + debian/clickhouse-server-base.service | 1 + debian/clickhouse-server-common.install | 2 ++ debian/clickhouse-server.postinst | 8 ++++---- debian/control | 14 ++++++++++++++ debian/rules | 5 +++++ release | 2 +- 12 files changed, 45 insertions(+), 6 deletions(-) create mode 120000 debian/clickhouse-server-base.cron.d create mode 100644 debian/clickhouse-server-base.install create mode 120000 debian/clickhouse-server-base.postinst create mode 120000 debian/clickhouse-server-base.preinst create mode 120000 debian/clickhouse-server-base.prerm create mode 120000 debian/clickhouse-server-base.service create mode 100644 debian/clickhouse-server-common.install diff --git a/debian/clickhouse-client.postinst b/debian/clickhouse-client.postinst index 355676990b6..ff54e3a58fc 100644 --- a/debian/clickhouse-client.postinst +++ b/debian/clickhouse-client.postinst @@ -1,7 +1,9 @@ #!/bin/sh set -e +CLICKHOUSE_USER=${CLICKHOUSE_USER=clickhouse} + mkdir -p /etc/clickhouse-client/conf.d # user created by clickhouse-server package -chown -R clickhouse /etc/clickhouse-client || true +chown -R ${CLICKHOUSE_USER} /etc/clickhouse-client || true diff --git a/debian/clickhouse-server-base.cron.d b/debian/clickhouse-server-base.cron.d new file mode 120000 index 00000000000..23e744386dd --- /dev/null +++ b/debian/clickhouse-server-base.cron.d @@ -0,0 +1 @@ +clickhouse-server.cron.d \ No newline at end of file diff --git a/debian/clickhouse-server-base.install b/debian/clickhouse-server-base.install new file mode 100644 index 00000000000..971955da925 --- /dev/null +++ b/debian/clickhouse-server-base.install @@ -0,0 +1,11 @@ +usr/bin/clickhouse +usr/bin/clickhouse-server +usr/bin/clickhouse-clang +usr/bin/clickhouse-lld +usr/bin/clickhouse-copier +usr/bin/clickhouse-report +etc/systemd/system/clickhouse-server.service +etc/init.d/clickhouse-server +etc/cron.d/clickhouse-server +usr/share/clickhouse/* +etc/security/limits.d/clickhouse.conf diff --git a/debian/clickhouse-server-base.postinst b/debian/clickhouse-server-base.postinst new file mode 120000 index 00000000000..42fbd368922 --- /dev/null +++ b/debian/clickhouse-server-base.postinst @@ -0,0 +1 @@ +clickhouse-server.postinst \ No newline at end of file diff --git a/debian/clickhouse-server-base.preinst b/debian/clickhouse-server-base.preinst new file mode 120000 index 00000000000..dbc74e163bf --- /dev/null +++ b/debian/clickhouse-server-base.preinst @@ -0,0 +1 @@ +clickhouse-server.preinst \ No newline at end of file diff --git a/debian/clickhouse-server-base.prerm b/debian/clickhouse-server-base.prerm new file mode 120000 index 00000000000..03f62e02475 --- /dev/null +++ b/debian/clickhouse-server-base.prerm @@ -0,0 +1 @@ +clickhouse-server.prerm \ No newline at end of file diff --git a/debian/clickhouse-server-base.service b/debian/clickhouse-server-base.service new file mode 120000 index 00000000000..b00af30916c --- /dev/null +++ b/debian/clickhouse-server-base.service @@ -0,0 +1 @@ +clickhouse-server.service \ No newline at end of file diff --git a/debian/clickhouse-server-common.install b/debian/clickhouse-server-common.install new file mode 100644 index 00000000000..7237e9914d5 --- /dev/null +++ b/debian/clickhouse-server-common.install @@ -0,0 +1,2 @@ +etc/clickhouse-server/config.xml etc/clickhouse-server +etc/clickhouse-server/users.xml etc/clickhouse-server diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index 6946f8b2728..3476ef665ef 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -1,10 +1,10 @@ #!/bin/sh set -e -CLICKHOUSE_USER=clickhouse -CLICKHOUSE_GROUP=${CLICKHOUSE_USER} -CLICKHOUSE_DATADIR=/var/lib/clickhouse -CLICKHOUSE_LOGDIR=/var/log/clickhouse-server +CLICKHOUSE_USER=${CLICKHOUSE_USER=clickhouse} +CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP=${CLICKHOUSE_USER}} +CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} +CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} test -f /etc/default/clickhouse && . /etc/default/clickhouse diff --git a/debian/control b/debian/control index 8f57ae258f4..1b3f4656ecb 100644 --- a/debian/control +++ b/debian/control @@ -66,3 +66,17 @@ Priority: extra Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-client, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl Description: Clickhouse tests + + +# TODO: Remove: + +Package: clickhouse-server-base +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, adduser, tzdata +Description: DEPRECATED PACKAGE: Server binary for clickhouse + + +Package: clickhouse-server-common +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends}, clickhouse-server-base (= ${binary:Version}) +Description: DEPRECATED PACKAGE: Common configuration files for clickhouse-server-base package diff --git a/debian/rules b/debian/rules index fb21adf9984..245a148ff89 100755 --- a/debian/rules +++ b/debian/rules @@ -97,6 +97,11 @@ override_dh_install: touch $(DESTDIR)/etc/clickhouse-server/metrika/config.xml touch $(DESTDIR)/etc/clickhouse-server/metrika/users.xml + # todo: remove after removing clickhouse-server-base package: + mkdir -p $(DESTDIR)/etc/init.d $(DESTDIR)/etc/cron.d + cp debian/clickhouse-server.init $(DESTDIR)/etc/init.d/clickhouse-server + cp debian/clickhouse-server.cron.d $(DESTDIR)/etc/cron.d/clickhouse-server + dh_install --list-missing --sourcedir=$(DESTDIR) override_dh_auto_install: diff --git a/release b/release index ae19a8ada46..e2ff2579dde 100755 --- a/release +++ b/release @@ -43,7 +43,7 @@ do shift elif [[ $1 == '--fast' ]]; then # Wrong but fast pbuilder mode: create base package with all depends - EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake gcc-7 g++-7 libc6-dev libmariadbclient-dev libicu-dev libltdl-dev libreadline-dev libssl-dev unixodbc-dev psmisc bash expect python python-lxml python-termcolor curl perl sudo openssl" + EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libmariadbclient-dev libicu-dev libltdl-dev libreadline-dev libssl-dev unixodbc-dev psmisc bash expect python python-lxml python-termcolor curl perl sudo openssl" shift else echo "Unknown option $1" From 2a749ccffa6f5e1e12cb00e76dccfff8b9679d21 Mon Sep 17 00:00:00 2001 From: robot-metrika-test Date: Fri, 6 Apr 2018 23:52:52 +0300 Subject: [PATCH 014/192] Auto version update to [54375] --- dbms/cmake/version.cmake | 4 ++-- debian/changelog | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 62d9d47d8d8..eeaff159b27 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,6 +1,6 @@ # This strings autochanged from release_lib.sh: -set(VERSION_DESCRIBE v1.1.54374-testing) -set(VERSION_REVISION 54374) +set(VERSION_DESCRIBE v1.1.54375-testing) +set(VERSION_REVISION 54375) # end of autochange set (VERSION_MAJOR 1) diff --git a/debian/changelog b/debian/changelog index 68694a3522d..675f47672a2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (1.1.54374) unstable; urgency=low +clickhouse (1.1.54375) unstable; urgency=low * Modified source code - -- Thu, 05 Apr 2018 21:26:54 +0300 + -- Fri, 06 Apr 2018 23:52:52 +0300 From 40a88598031e34aaddd700d95de5c822452a3356 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 00:46:57 +0300 Subject: [PATCH 015/192] ReplicatedMergeTree: Better diagnostics [#CLICKHOUSE-2] --- .../MergeTree/ReplicatedMergeTreeAlterThread.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- .../ReplicatedMergeTreePartCheckThread.cpp | 4 ++-- .../ReplicatedMergeTreeRestartingThread.cpp | 6 +++--- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 14 +++++++------- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp index 3b9099f23eb..bc6f58f698a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.cpp @@ -190,7 +190,7 @@ void ReplicatedMergeTreeAlterThread::run() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); force_recheck_parts = true; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 7259b91ddd7..9ef2618ebc8 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -38,7 +38,7 @@ void ReplicatedMergeTreeCleanupThread::run() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } storage.cleanup_thread_event.tryWait(CLEANUP_SLEEP_MS); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 85e58f4551b..6dbf462952a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -265,7 +265,7 @@ void ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) { /// TODO Better to check error code. - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); LOG_ERROR(log, "Part " << part_name << " looks broken. Removing it and queueing a fetch."); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecksFailed); @@ -383,7 +383,7 @@ void ReplicatedMergeTreePartCheckThread::run() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); wakeup_event.tryWait(PART_CHECK_ERROR_SLEEP_MS); } } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 1cd958c60d6..5affd77ac7b 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -92,7 +92,7 @@ void ReplicatedMergeTreeRestartingThread::run() catch (const zkutil::KeeperException & e) { /// The exception when you try to zookeeper_init usually happens if DNS does not work. We will try to do it again. - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); if (first_time) storage.startup_event.set(); @@ -157,7 +157,7 @@ void ReplicatedMergeTreeRestartingThread::run() catch (...) { storage.startup_event.set(); - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } wakeup_event.tryWait(check_period_ms); @@ -180,7 +180,7 @@ void ReplicatedMergeTreeRestartingThread::run() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } LOG_DEBUG(log, "Restarting thread finished"); diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 8273ef5cb8f..5bbd681a8bd 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -240,7 +240,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// Failed to connect to ZK (this became known when trying to perform the first operation). if (e.code == ZooKeeperImpl::ZooKeeper::ZCONNECTIONLOSS) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); current_zookeeper = nullptr; } else @@ -1474,7 +1474,7 @@ bool StorageReplicatedMergeTree::executeFetch(const StorageReplicatedMergeTree:: } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } throw; @@ -1612,7 +1612,7 @@ void StorageReplicatedMergeTree::queueUpdatingThread() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); queue_updating_event->tryWait(QUEUE_UPDATE_ERROR_SLEEP_MS); } } @@ -1632,7 +1632,7 @@ bool StorageReplicatedMergeTree::queueTask() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } LogEntryPtr & entry = selected.first; @@ -1666,7 +1666,7 @@ bool StorageReplicatedMergeTree::queueTask() LOG_INFO(log, e.displayText()); } else - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); /** This exception will be written to the queue element, and it can be looked up using `system.replication_queue` table. * The thread that performs this action will sleep a few seconds after the exception. @@ -1676,7 +1676,7 @@ bool StorageReplicatedMergeTree::queueTask() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); throw; } }); @@ -1935,7 +1935,7 @@ void StorageReplicatedMergeTree::mergeSelectingThread() } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log, __PRETTY_FUNCTION__); } if (!is_leader) From 7512754ae50701fda8169b4cbff4345b40c30533 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 04:46:50 +0300 Subject: [PATCH 016/192] Improved code after introduction of method "getHeader" in every stream [#CLICKHOUSE-2] --- .../AggregatingSortedBlockInputStream.cpp | 68 ++-- .../AggregatingSortedBlockInputStream.h | 6 +- .../CollapsingSortedBlockInputStream.cpp | 9 +- .../CollapsingSortedBlockInputStream.h | 7 +- .../GraphiteRollupSortedBlockInputStream.cpp | 50 +-- .../GraphiteRollupSortedBlockInputStream.h | 8 +- .../MergeSortingBlockInputStream.cpp | 19 +- .../MergeSortingBlockInputStream.h | 7 +- .../MergingSortedBlockInputStream.cpp | 39 +-- .../MergingSortedBlockInputStream.h | 12 +- .../ReplacingSortedBlockInputStream.cpp | 11 +- .../ReplacingSortedBlockInputStream.h | 11 +- .../SummingSortedBlockInputStream.cpp | 324 +++++++++--------- .../SummingSortedBlockInputStream.h | 9 +- ...sionedCollapsingSortedBlockInputStream.cpp | 26 +- ...ersionedCollapsingSortedBlockInputStream.h | 16 +- 16 files changed, 291 insertions(+), 331 deletions(-) diff --git a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp index 0f27cfdb2ca..7b431e206e9 100644 --- a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.cpp @@ -12,15 +12,46 @@ namespace ErrorCodes } +AggregatingSortedBlockInputStream::AggregatingSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_) + : MergingSortedBlockInputStream(inputs_, description_, max_block_size_) +{ + /// Fill in the column numbers that need to be aggregated. + for (size_t i = 0; i < num_columns; ++i) + { + ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// We leave only states of aggregate functions. + if (!startsWith(column.type->getName(), "AggregateFunction")) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Included into PK? + SortDescription::const_iterator it = description.begin(); + for (; it != description.end(); ++it) + if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i)) + break; + + if (it != description.end()) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + column_numbers_to_aggregate.push_back(i); + } +} + + Block AggregatingSortedBlockInputStream::readImpl() { if (finished) return Block(); - Block header; MutableColumns merged_columns; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); @@ -28,37 +59,6 @@ Block AggregatingSortedBlockInputStream::readImpl() if (merged_columns.empty()) return Block(); - /// Additional initialization. - if (next_key.empty()) - { - /// Fill in the column numbers that need to be aggregated. - for (size_t i = 0; i < num_columns; ++i) - { - ColumnWithTypeAndName & column = header.safeGetByPosition(i); - - /// We leave only states of aggregate functions. - if (!startsWith(column.type->getName(), "AggregateFunction")) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - /// Included into PK? - SortDescription::const_iterator it = description.begin(); - for (; it != description.end(); ++it) - if (it->column_name == column.name || (it->column_name.empty() && it->column_number == i)) - break; - - if (it != description.end()) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - column_numbers_to_aggregate.push_back(i); - } - } - columns_to_aggregate.resize(column_numbers_to_aggregate.size()); for (size_t i = 0, size = columns_to_aggregate.size(); i < size; ++i) columns_to_aggregate[i] = typeid_cast(merged_columns[column_numbers_to_aggregate[i]].get()); diff --git a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h index e428b3b7e20..5047158aa2d 100644 --- a/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/AggregatingSortedBlockInputStream.h @@ -21,10 +21,8 @@ namespace DB class AggregatingSortedBlockInputStream : public MergingSortedBlockInputStream { public: - AggregatingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_, size_t max_block_size_) - : MergingSortedBlockInputStream(inputs_, description_, max_block_size_) - { - } + AggregatingSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_); String getName() const override { return "AggregatingSorted"; } diff --git a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp index 9b70bd6b89a..01127b5029b 100644 --- a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.cpp @@ -108,10 +108,8 @@ Block CollapsingSortedBlockInputStream::readImpl() if (finished) return {}; - Block header; MutableColumns merged_columns; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); @@ -119,11 +117,6 @@ Block CollapsingSortedBlockInputStream::readImpl() if (merged_columns.empty()) return {}; - /// Additional initialization. - if (first_negative.empty()) - sign_column_number = header.getPositionByName(sign_column); - - merge(merged_columns, queue); return header.cloneWithColumns(std::move(merged_columns)); } diff --git a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h index 7280dda02b1..e8650b4efc5 100644 --- a/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/CollapsingSortedBlockInputStream.h @@ -25,10 +25,10 @@ class CollapsingSortedBlockInputStream : public MergingSortedBlockInputStream public: CollapsingSortedBlockInputStream( BlockInputStreams inputs_, const SortDescription & description_, - const String & sign_column_, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr) + const String & sign_column, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr) : MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_) - , sign_column(sign_column_) { + sign_column_number = header.getPositionByName(sign_column); } String getName() const override { return "CollapsingSorted"; } @@ -38,8 +38,7 @@ protected: Block readImpl() override; private: - String sign_column; - size_t sign_column_number = 0; + size_t sign_column_number; Logger * log = &Logger::get("CollapsingSortedBlockInputStream"); diff --git a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp index 5da53d8eea5..0a2273d45a9 100644 --- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.cpp @@ -12,6 +12,31 @@ namespace ErrorCodes } +GraphiteRollupSortedBlockInputStream::GraphiteRollupSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_, + const Graphite::Params & params, time_t time_of_merge) + : MergingSortedBlockInputStream(inputs_, description_, max_block_size_), + params(params), time_of_merge(time_of_merge) +{ + size_t max_size_of_aggregate_state = 0; + for (const auto & pattern : params.patterns) + if (pattern.function->sizeOfData() > max_size_of_aggregate_state) + max_size_of_aggregate_state = pattern.function->sizeOfData(); + + place_for_aggregate_state.resize(max_size_of_aggregate_state); + + /// Memoize column numbers in block. + path_column_num = header.getPositionByName(params.path_column_name); + time_column_num = header.getPositionByName(params.time_column_name); + value_column_num = header.getPositionByName(params.value_column_name); + version_column_num = header.getPositionByName(params.version_column_name); + + for (size_t i = 0; i < num_columns; ++i) + if (i != time_column_num && i != value_column_num && i != version_column_num) + unmodified_column_numbers.push_back(i); +} + + const Graphite::Pattern * GraphiteRollupSortedBlockInputStream::selectPatternForPath(StringRef path) const { for (const auto & pattern : params.patterns) @@ -68,10 +93,8 @@ Block GraphiteRollupSortedBlockInputStream::readImpl() if (finished) return Block(); - Block header; MutableColumns merged_columns; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); @@ -79,27 +102,6 @@ Block GraphiteRollupSortedBlockInputStream::readImpl() if (merged_columns.empty()) return Block(); - /// Additional initialization. - if (is_first) - { - size_t max_size_of_aggregate_state = 0; - for (const auto & pattern : params.patterns) - if (pattern.function->sizeOfData() > max_size_of_aggregate_state) - max_size_of_aggregate_state = pattern.function->sizeOfData(); - - place_for_aggregate_state.resize(max_size_of_aggregate_state); - - /// Memoize column numbers in block. - path_column_num = header.getPositionByName(params.path_column_name); - time_column_num = header.getPositionByName(params.time_column_name); - value_column_num = header.getPositionByName(params.value_column_name); - version_column_num = header.getPositionByName(params.version_column_name); - - for (size_t i = 0; i < num_columns; ++i) - if (i != time_column_num && i != value_column_num && i != version_column_num) - unmodified_column_numbers.push_back(i); - } - merge(merged_columns, queue); return header.cloneWithColumns(std::move(merged_columns)); } diff --git a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h index c256d27064d..15dfe7c0f4d 100644 --- a/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h +++ b/dbms/src/DataStreams/GraphiteRollupSortedBlockInputStream.h @@ -126,12 +126,8 @@ class GraphiteRollupSortedBlockInputStream : public MergingSortedBlockInputStrea { public: GraphiteRollupSortedBlockInputStream( - BlockInputStreams inputs_, const SortDescription & description_, size_t max_block_size_, - const Graphite::Params & params, time_t time_of_merge) - : MergingSortedBlockInputStream(inputs_, description_, max_block_size_), - params(params), time_of_merge(time_of_merge) - { - } + const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_, + const Graphite::Params & params, time_t time_of_merge); String getName() const override { return "GraphiteRollupSorted"; } diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index 628de41b32e..abfcdc89698 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -63,14 +63,21 @@ static void enrichBlockWithConstants(Block & block, const Block & header) } +MergeSortingBlockInputStream::MergeSortingBlockInputStream( + const BlockInputStreamPtr & input, SortDescription & description_, + size_t max_merged_block_size_, size_t limit_, + size_t max_bytes_before_external_sort_, const std::string & tmp_path_) + : description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_), + max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_) +{ + children.push_back(input); + header = getHeader(); + removeConstantsFromSortDescription(header, description); +} + + Block MergeSortingBlockInputStream::readImpl() { - if (!header) - { - header = getHeader(); - removeConstantsFromSortDescription(header, description); - } - /** Algorithm: * - read to memory blocks from source stream; * - if too many of them and if external sorting is enabled, diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index 416dc0ecce7..498837f3bff 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -73,12 +73,7 @@ public: /// limit - if not 0, allowed to return just first 'limit' rows in sorted order. MergeSortingBlockInputStream(const BlockInputStreamPtr & input, SortDescription & description_, size_t max_merged_block_size_, size_t limit_, - size_t max_bytes_before_external_sort_, const std::string & tmp_path_) - : description(description_), max_merged_block_size(max_merged_block_size_), limit(limit_), - max_bytes_before_external_sort(max_bytes_before_external_sort_), tmp_path(tmp_path_) - { - children.push_back(input); - } + size_t max_bytes_before_external_sort_, const std::string & tmp_path_); String getName() const override { return "MergeSorting"; } diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp index be90a00e4b9..62b32330679 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.cpp @@ -15,15 +15,17 @@ namespace ErrorCodes MergingSortedBlockInputStream::MergingSortedBlockInputStream( - BlockInputStreams & inputs_, const SortDescription & description_, - size_t max_block_size_, size_t limit_, WriteBuffer * out_row_sources_buf_, bool quiet_) + const BlockInputStreams & inputs_, const SortDescription & description_, + size_t max_block_size_, size_t limit_, WriteBuffer * out_row_sources_buf_, bool quiet_) : description(description_), max_block_size(max_block_size_), limit(limit_), quiet(quiet_) , source_blocks(inputs_.size()), cursors(inputs_.size()), out_row_sources_buf(out_row_sources_buf_) { children.insert(children.end(), inputs_.begin(), inputs_.end()); + header = children.at(0)->getHeader(); + num_columns = header.columns(); } -void MergingSortedBlockInputStream::init(Block & header, MutableColumns & merged_columns) +void MergingSortedBlockInputStream::init(MutableColumns & merged_columns) { /// Read the first blocks, initialize the queue. if (first) @@ -44,9 +46,6 @@ void MergingSortedBlockInputStream::init(Block & header, MutableColumns & merged if (rows == 0) continue; - if (!num_columns) - num_columns = shared_block_ptr->columns(); - if (expected_block_size < rows) expected_block_size = std::min(rows, max_block_size); @@ -62,32 +61,9 @@ void MergingSortedBlockInputStream::init(Block & header, MutableColumns & merged initQueue(queue); } - /// Initialize the result. - - /// We clone the structure of the first non-empty source block. - { - auto it = source_blocks.cbegin(); - for (; it != source_blocks.cend(); ++it) - { - const SharedBlockPtr & shared_block_ptr = *it; - - if (*shared_block_ptr) - { - header = shared_block_ptr->cloneEmpty(); - break; - } - } - - /// If all the input blocks are empty. - if (it == source_blocks.cend()) - return; - } - /// Let's check that all source blocks have the same structure. - for (auto it = source_blocks.cbegin(); it != source_blocks.cend(); ++it) + for (const SharedBlockPtr & shared_block_ptr : source_blocks) { - const SharedBlockPtr & shared_block_ptr = *it; - if (!*shared_block_ptr) continue; @@ -120,10 +96,9 @@ Block MergingSortedBlockInputStream::readImpl() if (children.size() == 1) return children[0]->read(); - Block header; MutableColumns merged_columns; - init(header, merged_columns); + init(merged_columns); if (merged_columns.empty()) return {}; diff --git a/dbms/src/DataStreams/MergingSortedBlockInputStream.h b/dbms/src/DataStreams/MergingSortedBlockInputStream.h index 6391f52dcd5..825a9e1fcc3 100644 --- a/dbms/src/DataStreams/MergingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/MergingSortedBlockInputStream.h @@ -65,8 +65,8 @@ public: * quiet - don't log profiling info */ MergingSortedBlockInputStream( - BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_, - size_t limit_ = 0, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false); + const BlockInputStreams & inputs_, const SortDescription & description_, size_t max_block_size_, + size_t limit_ = 0, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false); String getName() const override { return "MergingSorted"; } @@ -74,7 +74,7 @@ public: bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } - Block getHeader() const override { return children.at(0)->getHeader(); } + Block getHeader() const override { return header; } protected: struct RowRef @@ -120,14 +120,16 @@ protected: void readSuffixImpl() override; - /// Initializes the queue and the next result block. - void init(Block & header, MutableColumns & merged_columns); + /// Initializes the queue and the columns of next result block. + void init(MutableColumns & merged_columns); /// Gets the next block from the source corresponding to the `current`. template void fetchNextBlock(const TSortCursor & current, std::priority_queue & queue); + Block header; + const SortDescription description; const size_t max_block_size; size_t limit; diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp index 553e3a01e4b..8fcfdfe2d58 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.cpp @@ -35,10 +35,8 @@ Block ReplacingSortedBlockInputStream::readImpl() if (finished) return Block(); - Block header; MutableColumns merged_columns; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); @@ -46,13 +44,6 @@ Block ReplacingSortedBlockInputStream::readImpl() if (merged_columns.empty()) return Block(); - /// Additional initialization. - if (selected_row.empty()) - { - if (!version_column.empty()) - version_column_number = header.getPositionByName(version_column); - } - merge(merged_columns, queue); return header.cloneWithColumns(std::move(merged_columns)); } diff --git a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h index b8592a0e5b6..d0a7594c69a 100644 --- a/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/ReplacingSortedBlockInputStream.h @@ -15,11 +15,13 @@ namespace DB class ReplacingSortedBlockInputStream : public MergingSortedBlockInputStream { public: - ReplacingSortedBlockInputStream(BlockInputStreams inputs_, const SortDescription & description_, - const String & version_column_, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr) - : MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_), - version_column(version_column_) + ReplacingSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, + const String & version_column, size_t max_block_size_, WriteBuffer * out_row_sources_buf_ = nullptr) + : MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_) { + if (!version_column.empty()) + version_column_number = header.getPositionByName(version_column); } String getName() const override { return "ReplacingSorted"; } @@ -29,7 +31,6 @@ protected: Block readImpl() override; private: - String version_column; ssize_t version_column_number = -1; Logger * log = &Logger::get("ReplacingSortedBlockInputStream"); diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp index e79366ca02d..e914b8f8b65 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.cpp @@ -24,6 +24,168 @@ namespace ErrorCodes } +namespace +{ + bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) + { + for (auto & desc : description) + if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) + return true; + + return false; + } +} + + +SummingSortedBlockInputStream::SummingSortedBlockInputStream( + const BlockInputStreams & inputs_, + const SortDescription & description_, + /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. + const Names & column_names_to_sum, + size_t max_block_size_) + : MergingSortedBlockInputStream(inputs_, description_, max_block_size_) +{ + current_row.resize(num_columns); + + /// name of nested structure -> the column numbers that refer to it. + std::unordered_map> discovered_maps; + + /** Fill in the column numbers, which must be summed. + * This can only be numeric columns that are not part of the sort key. + * If a non-empty column_names_to_sum is specified, then we only take these columns. + * Some columns from column_names_to_sum may not be found. This is ignored. + */ + for (size_t i = 0; i < num_columns; ++i) + { + const ColumnWithTypeAndName & column = header.safeGetByPosition(i); + + /// Discover nested Maps and find columns for summation + if (typeid_cast(column.type.get())) + { + const auto map_name = Nested::extractTableName(column.name); + /// if nested table name ends with `Map` it is a possible candidate for special handling + if (map_name == column.name || !endsWith(map_name, "Map")) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + discovered_maps[map_name].emplace_back(i); + } + else + { + if (!column.type->isSummable()) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + /// Are they inside the PK? + if (isInPrimaryKey(description, column.name, i)) + { + column_numbers_not_to_aggregate.push_back(i); + continue; + } + + if (column_names_to_sum.empty() + || column_names_to_sum.end() != + std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) + { + // Create aggregator to sum this column + AggregateDescription desc; + desc.column_numbers = {i}; + desc.init("sumWithOverflow", {column.type}); + columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Column is not going to be summed, use last value + column_numbers_not_to_aggregate.push_back(i); + } + } + } + + /// select actual nested Maps from list of candidates + for (const auto & map : discovered_maps) + { + /// map should contain at least two elements (key -> value) + if (map.second.size() < 2) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + /// no elements of map could be in primary key + auto column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) + break; + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + DataTypes argument_types; + AggregateDescription desc; + MapDescription map_desc; + + column_num_it = map.second.begin(); + for (; column_num_it != map.second.end(); ++column_num_it) + { + const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); + const String & name = key_col.name; + const IDataType & nested_type = *static_cast(key_col.type.get())->getNestedType(); + + if (column_num_it == map.second.begin() + || endsWith(name, "ID") + || endsWith(name, "Key") + || endsWith(name, "Type")) + { + if (!nested_type.isValueRepresentedByInteger()) + break; + + map_desc.key_col_nums.push_back(*column_num_it); + } + else + { + if (!nested_type.isSummable()) + break; + + map_desc.val_col_nums.push_back(*column_num_it); + } + + // Add column to function arguments + desc.column_numbers.push_back(*column_num_it); + argument_types.push_back(key_col.type); + } + + if (column_num_it != map.second.end()) + { + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + continue; + } + + if (map_desc.key_col_nums.size() == 1) + { + // Create summation for all value columns in the map + desc.init("sumMap", argument_types); + columns_to_aggregate.emplace_back(std::move(desc)); + } + else + { + // Fall back to legacy mergeMaps for composite keys + for (auto col : map.second) + column_numbers_not_to_aggregate.push_back(col); + maps_to_sum.emplace_back(std::move(map_desc)); + } + } +} + + void SummingSortedBlockInputStream::insertCurrentRowIfNeeded(MutableColumns & merged_columns, bool force_insertion) { for (auto & desc : columns_to_aggregate) @@ -78,28 +240,13 @@ void SummingSortedBlockInputStream::insertCurrentRowIfNeeded(MutableColumns & me } -namespace -{ - bool isInPrimaryKey(const SortDescription & description, const std::string & name, const size_t number) - { - for (auto & desc : description) - if (desc.column_name == name || (desc.column_name.empty() && desc.column_number == number)) - return true; - - return false; - } -} - - Block SummingSortedBlockInputStream::readImpl() { if (finished) return Block(); - Block header; MutableColumns merged_columns; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::LOGICAL_ERROR); @@ -107,150 +254,7 @@ Block SummingSortedBlockInputStream::readImpl() if (merged_columns.empty()) return {}; - /// Additional initialization. - if (current_row.empty()) - { - current_row.resize(num_columns); - - /// name of nested structure -> the column numbers that refer to it. - std::unordered_map> discovered_maps; - - /** Fill in the column numbers, which must be summed. - * This can only be numeric columns that are not part of the sort key. - * If a non-empty column_names_to_sum is specified, then we only take these columns. - * Some columns from column_names_to_sum may not be found. This is ignored. - */ - for (size_t i = 0; i < num_columns; ++i) - { - const ColumnWithTypeAndName & column = header.safeGetByPosition(i); - - /// Discover nested Maps and find columns for summation - if (typeid_cast(column.type.get())) - { - const auto map_name = Nested::extractTableName(column.name); - /// if nested table name ends with `Map` it is a possible candidate for special handling - if (map_name == column.name || !endsWith(map_name, "Map")) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - discovered_maps[map_name].emplace_back(i); - } - else - { - if (!column.type->isSummable()) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - /// Are they inside the PK? - if (isInPrimaryKey(description, column.name, i)) - { - column_numbers_not_to_aggregate.push_back(i); - continue; - } - - if (column_names_to_sum.empty() - || column_names_to_sum.end() != - std::find(column_names_to_sum.begin(), column_names_to_sum.end(), column.name)) - { - // Create aggregator to sum this column - AggregateDescription desc; - desc.column_numbers = {i}; - desc.init("sumWithOverflow", {column.type}); - columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Column is not going to be summed, use last value - column_numbers_not_to_aggregate.push_back(i); - } - } - } - - /// select actual nested Maps from list of candidates - for (const auto & map : discovered_maps) - { - /// map should contain at least two elements (key -> value) - if (map.second.size() < 2) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - /// no elements of map could be in primary key - auto column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - if (isInPrimaryKey(description, header.safeGetByPosition(*column_num_it).name, *column_num_it)) - break; - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - DataTypes argument_types = {}; - AggregateDescription desc; - MapDescription map_desc; - - column_num_it = map.second.begin(); - for (; column_num_it != map.second.end(); ++column_num_it) - { - const ColumnWithTypeAndName & key_col = header.safeGetByPosition(*column_num_it); - const String & name = key_col.name; - const IDataType & nested_type = *static_cast(key_col.type.get())->getNestedType(); - - if (column_num_it == map.second.begin() - || endsWith(name, "ID") - || endsWith(name, "Key") - || endsWith(name, "Type")) - { - if (!nested_type.isValueRepresentedByInteger()) - break; - - map_desc.key_col_nums.push_back(*column_num_it); - } - else - { - if (!nested_type.isSummable()) - break; - - map_desc.val_col_nums.push_back(*column_num_it); - } - - // Add column to function arguments - desc.column_numbers.push_back(*column_num_it); - argument_types.push_back(key_col.type); - } - - if (column_num_it != map.second.end()) - { - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - continue; - } - - if (map_desc.key_col_nums.size() == 1) - { - // Create summation for all value columns in the map - desc.init("sumMap", argument_types); - columns_to_aggregate.emplace_back(std::move(desc)); - } - else - { - // Fall back to legacy mergeMaps for composite keys - for (auto col : map.second) - column_numbers_not_to_aggregate.push_back(col); - maps_to_sum.emplace_back(std::move(map_desc)); - } - } - } - - // Update aggregation result columns for current block + /// Update aggregation result columns for current block for (auto & desc : columns_to_aggregate) { // Wrap aggregated columns in a tuple to match function signature diff --git a/dbms/src/DataStreams/SummingSortedBlockInputStream.h b/dbms/src/DataStreams/SummingSortedBlockInputStream.h index 62df3863fc6..78b61903d01 100644 --- a/dbms/src/DataStreams/SummingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/SummingSortedBlockInputStream.h @@ -24,14 +24,12 @@ namespace ErrorCodes class SummingSortedBlockInputStream : public MergingSortedBlockInputStream { public: - SummingSortedBlockInputStream(BlockInputStreams inputs_, + SummingSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, /// List of columns to be summed. If empty, all numeric columns that are not in the description are taken. const Names & column_names_to_sum_, - size_t max_block_size_) - : MergingSortedBlockInputStream(inputs_, description_, max_block_size_), column_names_to_sum(column_names_to_sum_) - { - } + size_t max_block_size_); String getName() const override { return "SummingSorted"; } @@ -46,7 +44,6 @@ private: bool finished = false; /// Columns with which values should be summed. - Names column_names_to_sum; /// If set, it is converted to column_numbers_to_aggregate when initialized. ColumnNumbers column_numbers_not_to_aggregate; /** A table can have nested tables that are treated in a special way. diff --git a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp index 45c529470c0..071752137c6 100644 --- a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp +++ b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.cpp @@ -2,6 +2,7 @@ #include #include + namespace DB { @@ -11,6 +12,20 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } + +VersionedCollapsingSortedBlockInputStream::VersionedCollapsingSortedBlockInputStream( + const BlockInputStreams & inputs_, const SortDescription & description_, + const String & sign_column_, size_t max_block_size_, bool can_collapse_all_rows_, + WriteBuffer * out_row_sources_buf_) + : MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_) + , max_rows_in_queue(std::min(std::max(3, max_block_size_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 2) + , current_keys(max_rows_in_queue + 1), can_collapse_all_rows(can_collapse_all_rows_) +{ + sign_column_number = header.getPositionByName(sign_column_); +} + + + inline ALWAYS_INLINE static void writeRowSourcePart(WriteBuffer & buffer, RowSourcePart row_source) { if constexpr (sizeof(RowSourcePart) == 1) @@ -52,12 +67,8 @@ Block VersionedCollapsingSortedBlockInputStream::readImpl() if (finished) return {}; - Block header; MutableColumns merged_columns; - - bool is_initialized = !first; - - init(header, merged_columns); + init(merged_columns); if (has_collation) throw Exception("Logical error: " + getName() + " does not support collations", ErrorCodes::NOT_IMPLEMENTED); @@ -65,11 +76,6 @@ Block VersionedCollapsingSortedBlockInputStream::readImpl() if (merged_columns.empty()) return {}; - /// Additional initialization. - if (!is_initialized) - sign_column_number = header.getPositionByName(sign_column); - - merge(merged_columns, queue); return header.cloneWithColumns(std::move(merged_columns)); } diff --git a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h index 1c299e78e81..636ee5e3833 100644 --- a/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h +++ b/dbms/src/DataStreams/VersionedCollapsingSortedBlockInputStream.h @@ -6,6 +6,7 @@ #include + namespace DB { @@ -16,6 +17,7 @@ namespace ErrorCodes static const size_t MAX_ROWS_IN_MULTIVERSION_QUEUE = 8192; + /* Deque with fixed memory size. Allows pushing gaps. * frontGap() returns the number of gaps were inserted before front. * @@ -173,15 +175,9 @@ public: /// Don't need version column. It's in primary key. /// max_rows_in_queue should be about max_block_size_ if we won't store a lot of extra blocks (RowRef holds SharedBlockPtr). VersionedCollapsingSortedBlockInputStream( - BlockInputStreams inputs_, const SortDescription & description_, - const String & sign_column_, size_t max_block_size_, bool can_collapse_all_rows_, - WriteBuffer * out_row_sources_buf_ = nullptr) - : MergingSortedBlockInputStream(inputs_, description_, max_block_size_, 0, out_row_sources_buf_) - , sign_column(sign_column_) - , max_rows_in_queue(std::min(std::max(3, max_block_size_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 2) - , current_keys(max_rows_in_queue + 1), can_collapse_all_rows(can_collapse_all_rows_) - { - } + const BlockInputStreams & inputs_, const SortDescription & description_, + const String & sign_column_, size_t max_block_size_, bool can_collapse_all_rows_, + WriteBuffer * out_row_sources_buf_ = nullptr); String getName() const override { return "VersionedCollapsingSorted"; } @@ -190,8 +186,6 @@ protected: Block readImpl() override; private: - String sign_column; - size_t sign_column_number = 0; Logger * log = &Logger::get("VersionedCollapsingSortedBlockInputStream"); From cf227d9858ff0f5ec9c98ad88edbc18822642277 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 05:29:12 +0300 Subject: [PATCH 017/192] Removed excessive library dependencies #2180 --- dbms/CMakeLists.txt | 2 -- dbms/src/Functions/CMakeLists.txt | 4 +++- dbms/src/Server/CMakeLists.txt | 4 ++-- dbms/src/TableFunctions/CMakeLists.txt | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 5e0bc6257fb..906897fd0f4 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -145,8 +145,6 @@ target_link_libraries (dbms clickhouse_common_config clickhouse_common_io ${MYSQLXX_LIBRARY} - ${FARMHASH_LIBRARIES} - ${METROHASH_LIBRARIES} ${RE2_LIBRARY} ${RE2_ST_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY} diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index cf0bf00b075..cbc5288eac5 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -78,7 +78,9 @@ list(REMOVE_ITEM clickhouse_functions_sources IFunction.cpp FunctionFactory.cpp list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h FunctionHelpers.h) add_library(clickhouse_functions ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing) + +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES}) + target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash) target_include_directories (clickhouse_functions BEFORE PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src) target_include_directories (clickhouse_functions BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) diff --git a/dbms/src/Server/CMakeLists.txt b/dbms/src/Server/CMakeLists.txt index a153d3c932a..b2d72fb3c8b 100644 --- a/dbms/src/Server/CMakeLists.txt +++ b/dbms/src/Server/CMakeLists.txt @@ -37,7 +37,7 @@ add_library (clickhouse-extract-from-config-lib ${SPLIT_SHARED} ExtractFromConfi target_link_libraries (clickhouse-extract-from-config-lib clickhouse_common_config clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) add_library (clickhouse-client-lib Client.cpp) -target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) +target_link_libraries (clickhouse-client-lib clickhouse_functions clickhouse_aggregate_functions ${LINE_EDITING_LIBS} ${Boost_PROGRAM_OPTIONS_LIBRARY}) target_include_directories (clickhouse-client-lib PRIVATE ${READLINE_INCLUDE_DIR}) install (FILES clickhouse-client.xml DESTINATION ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse-client RENAME config.xml) @@ -56,7 +56,7 @@ add_library (clickhouse-format-lib ${SPLIT_SHARED} Format.cpp) target_link_libraries (clickhouse-format-lib dbms clickhouse_common_io ${Boost_PROGRAM_OPTIONS_LIBRARY}) add_library (clickhouse-copier-lib ClusterCopier.cpp) -target_link_libraries (clickhouse-copier-lib clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions clickhouse_table_functions) +target_link_libraries (clickhouse-copier-lib clickhouse-server-lib clickhouse_functions clickhouse_aggregate_functions) if (USE_EMBEDDED_COMPILER) link_directories (${LLVM_LIBRARY_DIRS}) diff --git a/dbms/src/TableFunctions/CMakeLists.txt b/dbms/src/TableFunctions/CMakeLists.txt index 4708ed9b602..53bfccfa3a2 100644 --- a/dbms/src/TableFunctions/CMakeLists.txt +++ b/dbms/src/TableFunctions/CMakeLists.txt @@ -5,7 +5,7 @@ list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunc list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) -target_link_libraries(clickhouse_table_functions dbms clickhouse_storages_system ${Poco_Foundation_LIBRARY}) +target_link_libraries(clickhouse_table_functions dbms ${Poco_Foundation_LIBRARY}) if (Poco_SQLODBC_FOUND) target_link_libraries (clickhouse_table_functions ${Poco_SQLODBC_LIBRARY}) From b7e5f2d2fa7dc367681cd9a1a74dd6616f97c7ba Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 06:07:26 +0300 Subject: [PATCH 018/192] Moved SessionPoolHelpers #2180 --- .../libpocoext/include/Poco/Ext/SessionPoolHelpers.h | 0 .../libpocoext/src/SessionPoolHelpers.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename dbms/src/Common/PocoSessionPoolHelpers.h => libs/libpocoext/include/Poco/Ext/SessionPoolHelpers.h (100%) rename dbms/src/Common/PocoSessionPoolHelpers.cpp => libs/libpocoext/src/SessionPoolHelpers.cpp (92%) diff --git a/dbms/src/Common/PocoSessionPoolHelpers.h b/libs/libpocoext/include/Poco/Ext/SessionPoolHelpers.h similarity index 100% rename from dbms/src/Common/PocoSessionPoolHelpers.h rename to libs/libpocoext/include/Poco/Ext/SessionPoolHelpers.h diff --git a/dbms/src/Common/PocoSessionPoolHelpers.cpp b/libs/libpocoext/src/SessionPoolHelpers.cpp similarity index 92% rename from dbms/src/Common/PocoSessionPoolHelpers.cpp rename to libs/libpocoext/src/SessionPoolHelpers.cpp index f7fd155cbe9..61c1ace6b96 100644 --- a/dbms/src/Common/PocoSessionPoolHelpers.cpp +++ b/libs/libpocoext/src/SessionPoolHelpers.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include std::shared_ptr createAndCheckResizePocoSessionPool(PocoSessionPoolConstructor pool_constr) From aa77d0b3042bb8c0d49c7d353ea5a7401c6a9802 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 06:07:32 +0300 Subject: [PATCH 019/192] Moved SessionPoolHelpers #2180 --- dbms/src/Dictionaries/ODBCDictionarySource.cpp | 2 +- dbms/src/Storages/StorageODBC.cpp | 2 +- libs/libpocoext/CMakeLists.txt | 9 ++++++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/dbms/src/Dictionaries/ODBCDictionarySource.cpp b/dbms/src/Dictionaries/ODBCDictionarySource.cpp index 7db8dbe9e34..489b168b3fa 100644 --- a/dbms/src/Dictionaries/ODBCDictionarySource.cpp +++ b/dbms/src/Dictionaries/ODBCDictionarySource.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Storages/StorageODBC.cpp b/dbms/src/Storages/StorageODBC.cpp index 09791c0e314..39b51d46047 100644 --- a/dbms/src/Storages/StorageODBC.cpp +++ b/dbms/src/Storages/StorageODBC.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/libs/libpocoext/CMakeLists.txt b/libs/libpocoext/CMakeLists.txt index 64745f6e8d2..6b2c09b4bf1 100644 --- a/libs/libpocoext/CMakeLists.txt +++ b/libs/libpocoext/CMakeLists.txt @@ -1,9 +1,16 @@ add_library (pocoext ${SPLIT_SHARED} src/LevelFilterChannel.cpp src/ThreadNumber.cpp + src/SessionPoolHelpers.cpp include/Poco/Ext/LevelFilterChannel.h - include/Poco/Ext/ThreadNumber.h) + include/Poco/Ext/ThreadNumber.h + include/Poco/Ext/SessionPoolHelpers.h) + +if (Poco_Data_FOUND) + target_include_directories (pocoext PRIVATE ${Poco_Data_INCLUDE_DIRS}) + target_link_libraries(pocoext ${Poco_Data_LIBRARY}) +endif() target_include_directories (pocoext PUBLIC include PRIVATE ${COMMON_INCLUDE_DIR}) From 5ce62809dae399af5d88122cb2b98414a3d0a7ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Apr 2018 06:46:20 +0300 Subject: [PATCH 020/192] Fixed error [#CLICKHOUSE-2] --- dbms/src/DataStreams/MergeSortingBlockInputStream.cpp | 2 +- dbms/src/DataStreams/MergeSortingBlockInputStream.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp index abfcdc89698..9995dde25f5 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.cpp @@ -140,7 +140,7 @@ Block MergeSortingBlockInputStream::readImpl() /// Create sorted streams to merge. for (const auto & file : temporary_files) { - temporary_inputs.emplace_back(std::make_unique(file->path())); + temporary_inputs.emplace_back(std::make_unique(file->path(), header)); inputs_to_merge.emplace_back(temporary_inputs.back()->block_in); } diff --git a/dbms/src/DataStreams/MergeSortingBlockInputStream.h b/dbms/src/DataStreams/MergeSortingBlockInputStream.h index 498837f3bff..feb882effb0 100644 --- a/dbms/src/DataStreams/MergeSortingBlockInputStream.h +++ b/dbms/src/DataStreams/MergeSortingBlockInputStream.h @@ -115,8 +115,8 @@ private: CompressedReadBuffer compressed_in; BlockInputStreamPtr block_in; - TemporaryFileStream(const std::string & path) - : file_in(path), compressed_in(file_in), block_in(std::make_shared(compressed_in, 0)) {} + TemporaryFileStream(const std::string & path, const Block & header) + : file_in(path), compressed_in(file_in), block_in(std::make_shared(compressed_in, header, 0)) {} }; std::vector> temporary_inputs; From 9a9c2a1e9d65f4645cf8c70eeed943e3e7777cb0 Mon Sep 17 00:00:00 2001 From: proller Date: Fri, 6 Apr 2018 20:49:49 -0700 Subject: [PATCH 021/192] CLICKHOUSE-3444: show error for old query on replace_running_query (#2127) * Add tests * Fix test * Fix test build * CLICKHOUSE-3444: show error for replace_running_query * fix naming * fix * fix test * Update IProfilingBlockInputStream.cpp --- contrib/zstd | 2 +- dbms/src/DataStreams/IProfilingBlockInputStream.cpp | 2 +- dbms/src/Interpreters/ProcessList.cpp | 2 +- dbms/src/Interpreters/ProcessList.h | 8 ++++---- .../queries/0_stateless/00600_replace_running_query.sh | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/contrib/zstd b/contrib/zstd index 255597502c3..f4340f46b23 160000 --- a/contrib/zstd +++ b/contrib/zstd @@ -1 +1 @@ -Subproject commit 255597502c3a4ef150abc964e376d4202a8c2929 +Subproject commit f4340f46b2387bc8de7d5320c0b83bb1499933ad diff --git a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp index 8cb570bbf62..09eeff2225c 100644 --- a/dbms/src/DataStreams/IProfilingBlockInputStream.cpp +++ b/dbms/src/DataStreams/IProfilingBlockInputStream.cpp @@ -238,7 +238,7 @@ void IProfilingBlockInputStream::progressImpl(const Progress & value) if (process_list_elem) { if (!process_list_elem->updateProgressIn(value)) - cancel(false); + cancel(/* kill */ true); /// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers. diff --git a/dbms/src/Interpreters/ProcessList.cpp b/dbms/src/Interpreters/ProcessList.cpp index e6e77c85180..59c481e6e3a 100644 --- a/dbms/src/Interpreters/ProcessList.cpp +++ b/dbms/src/Interpreters/ProcessList.cpp @@ -66,7 +66,7 @@ ProcessList::EntryPtr ProcessList::insert( /// Ask queries to cancel. They will check this flag. for (auto it = range.first; it != range.second; ++it) - it->second->is_cancelled.store(true, std::memory_order_relaxed); + it->second->is_killed.store(true, std::memory_order_relaxed); } } } diff --git a/dbms/src/Interpreters/ProcessList.h b/dbms/src/Interpreters/ProcessList.h index a76e886414d..ecc29d671fe 100644 --- a/dbms/src/Interpreters/ProcessList.h +++ b/dbms/src/Interpreters/ProcessList.h @@ -78,7 +78,7 @@ private: CurrentMetrics::Increment num_queries {CurrentMetrics::Query}; - std::atomic is_cancelled { false }; + std::atomic is_killed { false }; /// Be careful using it. For example, queries field could be modified concurrently. const ProcessListForUser * user_process_list = nullptr; @@ -140,13 +140,13 @@ public: if (priority_handle) priority_handle->waitIfNeed(std::chrono::seconds(1)); /// NOTE Could make timeout customizable. - return !is_cancelled.load(std::memory_order_relaxed); + return !is_killed.load(std::memory_order_relaxed); } bool updateProgressOut(const Progress & value) { progress_out.incrementPiecewiseAtomically(value); - return !is_cancelled.load(std::memory_order_relaxed); + return !is_killed.load(std::memory_order_relaxed); } @@ -157,7 +157,7 @@ public: res.query = query; res.client_info = client_info; res.elapsed_seconds = watch.elapsedSeconds(); - res.is_cancelled = is_cancelled.load(std::memory_order_relaxed); + res.is_cancelled = is_killed.load(std::memory_order_relaxed); res.read_rows = progress_in.rows; res.read_bytes = progress_in.bytes; res.total_rows = progress_in.total_rows; diff --git a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh index 37799069779..6778bbce149 100755 --- a/dbms/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/dbms/tests/queries/0_stateless/00600_replace_running_query.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -o pipefail -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(100000000) GROUP BY k)' & +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT sum(ignore(*)) FROM (SELECT number % 1000 AS k, groupArray(number) FROM numbers(100000000) GROUP BY k)' 2>&1 > /dev/null & sleep 0.1 # First query (usually) should be received by the server after this sleep. -$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 1 WHERE 0' +$CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL?query_id=hello&replace_running_query=1" -d 'SELECT 0' wait From 0bf673112154abc589700a456863ed0da8d03737 Mon Sep 17 00:00:00 2001 From: proller Date: Sat, 7 Apr 2018 14:01:00 +0300 Subject: [PATCH 022/192] Docker fixes (query compiler, compatible? package rename) --- docker/server/Dockerfile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 20882fbeee3..1ee459693c0 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,14 +1,15 @@ -FROM ubuntu:16.04 +FROM ubuntu:17.10 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG version=\* RUN apt-get update && \ - apt-get install -y apt-transport-https && \ + apt-get install -y apt-transport-https dirmngr && \ mkdir -p /etc/apt/sources.list.d && \ + apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 && \ echo $repository | tee /etc/apt/sources.list.d/clickhouse.list && \ apt-get update && \ - apt-get install --allow-unauthenticated -y clickhouse-server=$version && \ + apt-get install --allow-unauthenticated -y "clickhouse-server|clickhouse-server-common=$version" libgcc-7-dev && \ rm -rf /var/lib/apt/lists/* /var/cache/debconf && \ apt-get clean From dcff1bb79d09235cc5766622846021f62b02240f Mon Sep 17 00:00:00 2001 From: morty Date: Fri, 25 May 2018 19:52:50 +0300 Subject: [PATCH 023/192] First implementation --- dbms/CMakeLists.txt | 2 +- dbms/src/Client/Commands.h | 17 +++++ dbms/src/Client/Completion.cpp | 123 +++++++++++++++++++++++++++++++++ dbms/src/Client/Completion.h | 37 ++++++++++ dbms/src/Server/Client.cpp | 54 ++++++++++++++- 5 files changed, 229 insertions(+), 4 deletions(-) create mode 100644 dbms/src/Client/Commands.h create mode 100644 dbms/src/Client/Completion.cpp create mode 100644 dbms/src/Client/Completion.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 813c63109fb..55c7a0f85c6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -32,7 +32,7 @@ find_package (Threads) add_subdirectory (src) set(dbms_headers) -set(dbms_sources) +set(dbms_sources src/Client/Completion.h src/Client/Completion.cpp src/Client/Commands.h) include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) diff --git a/dbms/src/Client/Commands.h b/dbms/src/Client/Commands.h new file mode 100644 index 00000000000..e7da1fa3ac4 --- /dev/null +++ b/dbms/src/Client/Commands.h @@ -0,0 +1,17 @@ +#ifndef CLICKHOUSE_COMMANDS_H +#define CLICKHOUSE_COMMANDS_H + +typedef struct { + char *name; /* User printable name of the function. */ +} COMMAND; + +COMMAND commands[] = { + {(char *)"SELECT"}, + {(char *)"SELECD"}, + {(char *)"SELECD.sss"}, + {(char *)"INSERT"}, + {(char *)"DELETE"}, + {(char *)nullptr}, +}; + +#endif //CLICKHOUSE_COMMANDS_H diff --git a/dbms/src/Client/Completion.cpp b/dbms/src/Client/Completion.cpp new file mode 100644 index 00000000000..2e73abd211f --- /dev/null +++ b/dbms/src/Client/Completion.cpp @@ -0,0 +1,123 @@ +#include +#include +#include "Completion.h" + +namespace Completion +{ + static uint hashpjw(const char *arKey, uint nKeyLength) + { + uint h = 0; + uint g; + uint i; + for (i = 0; i < nKeyLength; i++) { + h = (h << 4) + arKey[i]; + if ((g = (h & 0xF0000000))) { + h = h ^ (g >> 24); + h = h ^ g; + } + } + return h; + } + + int init_hash_table(HashTable *ht, size_t size) + { + ht->hashFunction = hashpjw; + ht->tableSize = size; + ht->buckets = (Bucket **)calloc(size, sizeof(Bucket)); + if (!ht->buckets) { + ht->initialized = false; + return FAILURE; + } + ht->initialized = true; + return SUCCESS; + } + + void hash_add_word(HashTable *ht, char *word) + { + uint i; + char *pos = word; + for (i = 1; *pos; i++, pos++) { + hash_insert_key(ht, word, i, word); + }; + } + + int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word) + { + uint hash; + size_t bucketIndex; + Bucket *bucket; + + if (keyLength <= 0) { + return FAILURE; + } + hash = ht->hashFunction(key, keyLength); + bucketIndex = hash % ht->tableSize; + bucket = ht->buckets[bucketIndex]; + while (bucket) { + if ( (bucket->hash == hash) && (bucket->keyLength == keyLength)) { + if (!memcmp(bucket->key, key, keyLength)) { + auto *entry = (HashEntry *) calloc(1, sizeof(HashEntry)); + if (entry == nullptr) { + return FAILURE; + } + entry->text = word; + entry->next = bucket->entry; + bucket->entry = entry; + + return SUCCESS; + } + } + bucket = bucket->next; + } + bucket = (Bucket *) calloc(1, sizeof(Bucket)); + if (bucket == nullptr) { + return FAILURE; + } + bucket->key = key; + bucket->keyLength = keyLength; + bucket->hash = hash; + + bucket->entry = (HashEntry *) calloc(1, sizeof(HashEntry)); + if (bucket->entry == nullptr) { + return FAILURE; + } + + bucket->entry->text = word; + bucket->entry->next = nullptr; + + bucket->next = ht->buckets[bucketIndex]; + + ht->buckets[bucketIndex] = bucket; + + return SUCCESS; + } + + Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length) + { + Bucket *bucket; + uint hash; + size_t bucketIndex; + hash = ht->hashFunction(word, length); + bucketIndex = hash % ht->tableSize; + bucket = ht->buckets[bucketIndex]; + + while (bucket) { + if ( + (bucket->hash == hash) + && (bucket->keyLength == length) + && (!memcmp(bucket->key, word, length)) + ) { + *res_length = length; + return bucket; + } + bucket = bucket->next; + } + + *res_length = 0; + + return (Bucket *) nullptr; + } +} + + + diff --git a/dbms/src/Client/Completion.h b/dbms/src/Client/Completion.h new file mode 100644 index 00000000000..2239cf3d97c --- /dev/null +++ b/dbms/src/Client/Completion.h @@ -0,0 +1,37 @@ +#ifndef CLICKHOUSE_COMPLETION_H +#define CLICKHOUSE_COMPLETION_H + +#define SUCCESS 0 +#define FAILURE 1 + +#include + +namespace Completion +{ + struct HashEntry { + char *text; + struct HashEntry *next; + }; + + struct Bucket { + uint hash; + char *key; + uint keyLength; + HashEntry *entry; + struct Bucket *next; + }; + + struct HashTable { + bool initialized; + size_t tableSize; + uint (*hashFunction)(const char *key, uint keyLength); + Bucket **buckets; + }; + + int init_hash_table(HashTable *ht, size_t size); + void hash_add_word(HashTable *ht, char *word); + int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word); + Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length); +} + +#endif //CLICKHOUSE_COMPLETION_H diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index e3e4705c50d..8fa3e081aea 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -50,11 +50,16 @@ #include #include #include +#include +#include #include "InterruptListener.h" #include #include #include +static Completion::HashTable ht; +char ** commands_completion(const char *, int, int); +char * commands_generator(const char *, int); /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -68,7 +73,6 @@ #define DISABLE_LINE_WRAPPING "\033[?7l" #define ENABLE_LINE_WRAPPING "\033[?7h" - namespace DB { @@ -336,6 +340,17 @@ private: || (now.month() == 1 && now.day() <= 5); } + void init_suggestions(Completion::HashTable *ht) + { + Completion::init_hash_table(ht, 128); + COMMAND *cmd = commands; + while (cmd->name) { + Completion::hash_add_word(ht, cmd->name); + cmd++; + } + rl_attempted_completion_function = commands_completion; + } + int mainImpl() { @@ -446,9 +461,10 @@ private: throw Exception("query_id could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); + init_suggestions(&ht); /// Turn tab completion off. - rl_bind_key('\t', rl_insert); +// rl_bind_key('\t', rl_insert); /// Load command history if present. if (config().has("history_file")) @@ -1533,7 +1549,6 @@ public: } - int mainEntryClickHouseClient(int argc, char ** argv) { DB::Client client; @@ -1550,3 +1565,36 @@ int mainEntryClickHouseClient(int argc, char ** argv) return client.run(); } + +char ** commands_completion(const char *text, int start, int end) +{ + rl_attempted_completion_over = start + end + 1; + return rl_completion_matches(text, commands_generator); +} + +char * commands_generator(const char * text, int state) +{ + static int text_length; + static Completion::Bucket *bucket; + static Completion::HashEntry *entry; + char * found; + + if (!state) text_length = (uint) strlen(text); + + if (text_length > 0) { + if (!state) { + uint length; + + bucket = Completion::hash_find_all_matches(&ht, text, (uint)strlen(text), &length); + if (!bucket) return (char *) nullptr; + entry = bucket->entry; + } + if (entry) { + found = strdup(entry->text); + entry = entry->next; + return found; + } + } + + return (char *) nullptr; +} From 12a68a9e235f6f3ed8547d9da4bd5eb4e51953df Mon Sep 17 00:00:00 2001 From: morty Date: Tue, 29 May 2018 20:04:53 +0300 Subject: [PATCH 024/192] Added all clickhouse query keywords --- dbms/CMakeLists.txt | 2 +- dbms/src/Client/Commands.h | 17 -- dbms/src/Client/QueryParts.h | 522 +++++++++++++++++++++++++++++++++++ dbms/src/Server/Client.cpp | 10 +- 4 files changed, 528 insertions(+), 23 deletions(-) delete mode 100644 dbms/src/Client/Commands.h create mode 100644 dbms/src/Client/QueryParts.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 55c7a0f85c6..dd3217123a6 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -32,7 +32,7 @@ find_package (Threads) add_subdirectory (src) set(dbms_headers) -set(dbms_sources src/Client/Completion.h src/Client/Completion.cpp src/Client/Commands.h) +set(dbms_sources src/Client/Completion.h src/Client/Completion.cpp src/Client/QueryParts.h) include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) diff --git a/dbms/src/Client/Commands.h b/dbms/src/Client/Commands.h deleted file mode 100644 index e7da1fa3ac4..00000000000 --- a/dbms/src/Client/Commands.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef CLICKHOUSE_COMMANDS_H -#define CLICKHOUSE_COMMANDS_H - -typedef struct { - char *name; /* User printable name of the function. */ -} COMMAND; - -COMMAND commands[] = { - {(char *)"SELECT"}, - {(char *)"SELECD"}, - {(char *)"SELECD.sss"}, - {(char *)"INSERT"}, - {(char *)"DELETE"}, - {(char *)nullptr}, -}; - -#endif //CLICKHOUSE_COMMANDS_H diff --git a/dbms/src/Client/QueryParts.h b/dbms/src/Client/QueryParts.h new file mode 100644 index 00000000000..d42f8ab254e --- /dev/null +++ b/dbms/src/Client/QueryParts.h @@ -0,0 +1,522 @@ +#ifndef CLICKHOUSE_QUERY_PARTS_H +#define CLICKHOUSE_QUERY_PARTS_H + +typedef struct { + char *name; /* User printable name of the function. */ +} QUERYPART; + +QUERYPART queryParts[] = { + // CREATE DATABASE, TABLE, VIEW + {(char *)"CREATE"}, + {(char *)"DATABASE"}, + {(char *)"IF"}, + {(char *)"NOT"}, + {(char *)"EXISTS"}, + {(char *)"TEMPORARY"}, + {(char *)"TABLE"}, + {(char *)"ON"}, + {(char *)"CLUSTER"}, + {(char *)"DEFAULT"}, + {(char *)"MATERIALIZED"}, + {(char *)"ALIAS"}, + {(char *)"ENGINE"}, + {(char *)"AS"}, + {(char *)"VIEW"}, + {(char *)"POPULATE"}, + //ATTACH/DETACH + {(char *)"ATTACH"}, + {(char *)"DETACH"}, + //DROP + {(char *)"DROP"}, + //RENAME + {(char *)"RENAME"}, + {(char *)"TO"}, + //ALTER + {(char *)"ALTER"}, + {(char *)"ADD"}, + {(char *)"MODIFY"}, + {(char *)"COLUMN"}, + {(char *)"AFTER"}, + //PARTITIONS + {(char *)"PARTITION"}, + {(char *)"PART"}, + {(char *)"FREEZE"}, + {(char *)"FETCH"}, + {(char *)"FROM"}, + //SHOW DATABASES,TABLES,PROCESSLIST + {(char *)"SHOW"}, + {(char *)"INTO"}, + {(char *)"OUTFILE"}, + {(char *)"FORMAT"}, + {(char *)"TABLES"}, + {(char *)"LIKE"}, + {(char *)"PROCESSLIST"}, + //DESCRIBE + {(char *)"DESCRIBE"}, + {(char *)"DESC"}, + //USE + {(char *)"USE"}, + //SET + {(char *)"SET"}, + //OPTIMIZE + {(char *)"OPTIMIZE"}, + {(char *)"FINAL"}, + //INSERT + {(char *)"INSERT"}, + {(char *)"VALUES"}, + //SELECT + {(char *)"SELECT"}, + {(char *)"DISTINCT"}, + {(char *)"SAMPLE"}, + {(char *)"ARRAY"}, + {(char *)"JOIN"}, + {(char *)"GLOBAL"}, + {(char *)"ANY"}, + {(char *)"ALL"}, + {(char *)"INNER"}, + {(char *)"LEFT"}, + {(char *)"USING"}, + {(char *)"PREWHERE"}, + {(char *)"WHERE"}, + {(char *)"GROUP"}, + {(char *)"BY"}, + {(char *)"WITH"}, + {(char *)"TOTALS"}, + {(char *)"HAVING"}, + {(char *)"ORDER"}, + {(char *)"LIMIT"}, + {(char *)"UNION"}, + {(char *)"AND"}, + {(char *)"OR"}, + {(char *)"ASC"}, + //IN + {(char *)"IN"}, + //KILL QUERY + {(char *)"KILL"}, + {(char *)"QUERY"}, + {(char *)"SYNC"}, + {(char *)"ASYNC"}, + {(char *)"TEST"}, + //Table engines + {(char *)"TinyLog"}, + {(char *)"Log"}, + {(char *)"Memory"}, + {(char *)"MergeTree"}, + {(char *)"ReplacingMergeTree"}, + {(char *)"SummingMergeTree"}, + {(char *)"AggregateFunction"}, + {(char *)"CollapsingMergeTree"}, + {(char *)"GraphiteMergeTree"}, + {(char *)"ReplicatedMergeTree"}, + {(char *)"ReplicatedSummingMergeTree"}, + {(char *)"ReplicatedReplacingMergeTree"}, + {(char *)"ReplicatedAggregatingMergeTree"}, + {(char *)"ReplicatedCollapsingMergeTree"}, + {(char *)"ReplicatedGraphiteMergeTree"}, + {(char *)"Distributed"}, + {(char *)"Dictionary"}, + {(char *)"Merge"}, + {(char *)"Buffer"}, + {(char *)"File"}, + {(char *)"Null"}, + {(char *)"Set"}, + {(char *)"Join"}, + {(char *)"View"}, + {(char *)"MaterializedView"}, + {(char *)"Kafka"}, + {(char *)"MySQL"}, + //FORMATS + {(char *)"TabSeparated"}, + {(char *)"TabSeparatedRaw"}, + {(char *)"TabSeparatedWithNames"}, + {(char *)"TSVWithNames"}, + {(char *)"TabSeparatedWithNamesAndTypes"}, + {(char *)"TSVWithNamesAndTypes"}, + {(char *)"CSV"}, + {(char *)"CSVWithNames"}, + {(char *)"Values"}, + {(char *)"Vertical"}, + {(char *)"VerticalRaw"}, + {(char *)"JSON"}, + {(char *)"JSONCompact"}, + {(char *)"JSONEachRow"}, + {(char *)"TSKV"}, + {(char *)"Pretty"}, + {(char *)"PrettyCompact"}, + {(char *)"PrettyCompactMonoBlock"}, + {(char *)"PrettyNoEscapes"}, + {(char *)"PrettySpace"}, + {(char *)"RowBinary"}, + {(char *)"Native"}, + {(char *)"XML"}, + {(char *)"CapnProto"}, + //TYPES + {(char *)"Int8"}, + {(char *)"Int16"}, + {(char *)"Int32"}, + {(char *)"Int64"}, + {(char *)"UInt8"}, + {(char *)"UInt16"}, + {(char *)"UInt32"}, + {(char *)"UInt64"}, + {(char *)"Float32"}, + {(char *)"Float64"}, + {(char *)"Boolean"}, + {(char *)"String"}, + {(char *)"FixedString"}, + {(char *)"Date"}, + {(char *)"DateTime"}, + {(char *)"Enum8"}, + {(char *)"Enum16"}, + {(char *)"Array"}, + {(char *)"Tuple"}, + {(char *)"Nested"}, + {(char *)"Expression"}, + {(char *)"Set"}, + //FUNCTIONS + {(char *)"plus"}, + {(char *)"minus"}, + {(char *)"multiply"}, + {(char *)"divide"}, + {(char *)"intDiv"}, + {(char *)"intDivOrZero"}, + {(char *)"modulo"}, + {(char *)"negate"}, + {(char *)"abs"}, + {(char *)"gcd"}, + {(char *)"lcm"}, + {(char *)"equals"}, + {(char *)"notEquals"}, + {(char *)"less"}, + {(char *)"greater"}, + {(char *)"lessOrEquals"}, + {(char *)"greaterOrEquals"}, + {(char *)"and"}, + {(char *)"or"}, + {(char *)"not"}, + {(char *)"xor"}, + {(char *)"toUInt8"}, + {(char *)"toUInt16"}, + {(char *)"toUInt32"}, + {(char *)"toUInt64"}, + {(char *)"toInt8"}, + {(char *)"toInt16"}, + {(char *)"toInt32"}, + {(char *)"toInt64"}, + {(char *)"toFloat32"}, + {(char *)"toFloat64"}, + {(char *)"toUInt8OrZero"}, + {(char *)"toUInt16OrZero"}, + {(char *)"toUInt32OrZero"}, + {(char *)"toUInt64OrZero"}, + {(char *)"toInt8OrZero"}, + {(char *)"toInt16OrZero"}, + {(char *)"toInt32OrZero"}, + {(char *)"toInt64OrZero"}, + {(char *)"toFloat32OrZero"}, + {(char *)"toFloat64OrZero"}, + {(char *)"toDate"}, + {(char *)"toDateTime"}, + {(char *)"toString"}, + {(char *)"toFixedString"}, + {(char *)"toStringCutToZero"}, + {(char *)"reinterpretAsUInt8"}, + {(char *)"reinterpretAsUInt16"}, + {(char *)"reinterpretAsUInt32"}, + {(char *)"reinterpretAsUInt64"}, + {(char *)"reinterpretAsInt8"}, + {(char *)"reinterpretAsInt16"}, + {(char *)"reinterpretAsInt32"}, + {(char *)"reinterpretAsInt64"}, + {(char *)"reinterpretAsFloat32"}, + {(char *)"reinterpretAsFloat64"}, + {(char *)"reinterpretAsDate"}, + {(char *)"reinterpretAsDateTime"}, + {(char *)"reinterpretAsString"}, + {(char *)"CAST"}, + {(char *)"toYear"}, + {(char *)"toMonth"}, + {(char *)"toDayOfMonth"}, + {(char *)"toDayOfWeek"}, + {(char *)"toHour"}, + {(char *)"toMinute"}, + {(char *)"toSecond"}, + {(char *)"toMonday"}, + {(char *)"toStartOfMonth"}, + {(char *)"toStartOfQuarter"}, + {(char *)"toStartOfYear"}, + {(char *)"toStartOfMinute"}, + {(char *)"toStartOfFiveMinute"}, + {(char *)"toStartOfFifteenMinutes"}, + {(char *)"toStartOfHour"}, + {(char *)"toStartOfDay"}, + {(char *)"toTime"}, + {(char *)"toRelativeYearNum"}, + {(char *)"toRelativeMonthNum"}, + {(char *)"toRelativeWeekNum"}, + {(char *)"toRelativeDayNum"}, + {(char *)"toRelativeHourNum"}, + {(char *)"toRelativeMinuteNum"}, + {(char *)"toRelativeSecondNum"}, + {(char *)"now"}, + {(char *)"today"}, + {(char *)"yesterday"}, + {(char *)"timeSlot"}, + {(char *)"empty"}, + {(char *)"notEmpty"}, + {(char *)"length"}, + {(char *)"lengthUTF8"}, + {(char *)"lower"}, + {(char *)"upper"}, + {(char *)"lowerUTF8"}, + {(char *)"upperUTF8"}, + {(char *)"reverse"}, + {(char *)"reverseUTF8"}, + {(char *)"concat"}, + {(char *)"substringUTF8"}, + {(char *)"appendTrailingCharIfAbsent"}, + {(char *)"convertCharset"}, + {(char *)"position"}, + {(char *)"positionUTF8"}, + {(char *)"match"}, + {(char *)"extract"}, + {(char *)"extractAll"}, + {(char *)"like"}, + {(char *)"notLike"}, + {(char *)"replaceOne"}, + {(char *)"replaceAll"}, + {(char *)"replaceRegexpOne"}, + {(char *)"replaceRegexpAll"}, + {(char *)"if"}, + {(char *)"e"}, + {(char *)"pi"}, + {(char *)"exp"}, + {(char *)"log"}, + {(char *)"exp2"}, + {(char *)"log2"}, + {(char *)"exp10"}, + {(char *)"log10"}, + {(char *)"sqrt"}, + {(char *)"cbrt"}, + {(char *)"erf"}, + {(char *)"erfc"}, + {(char *)"lgamma"}, + {(char *)"tgamma"}, + {(char *)"sin"}, + {(char *)"cos"}, + {(char *)"tan"}, + {(char *)"asin"}, + {(char *)"acos"}, + {(char *)"atan"}, + {(char *)"pow"}, + {(char *)"floor"}, + {(char *)"ceil"}, + {(char *)"round"}, + {(char *)"roundToExp2"}, + {(char *)"roundDuration"}, + {(char *)"roundAge"}, + {(char *)"emptyArrayUInt8"}, + {(char *)"emptyArrayUInt16"}, + {(char *)"emptyArrayUInt32"}, + {(char *)"emptyArrayUInt64"}, + {(char *)"emptyArrayInt8"}, + {(char *)"emptyArrayInt16"}, + {(char *)"emptyArrayInt32"}, + {(char *)"emptyArrayInt64"}, + {(char *)"emptyArrayFloat32"}, + {(char *)"emptyArrayFloat64"}, + {(char *)"emptyArrayDate"}, + {(char *)"emptyArrayDateTime"}, + {(char *)"emptyArrayString"}, + {(char *)"emptyArrayToSingle"}, + {(char *)"range"}, + {(char *)"array"}, + {(char *)"arrayConcat"}, + {(char *)"arrayElement"}, + {(char *)"has"}, + {(char *)"indexOf"}, + {(char *)"countEqual"}, + {(char *)"arrayEnumerate"}, + {(char *)"arrayEnumerateUniq"}, + {(char *)"arrayPopBack"}, + {(char *)"arrayPopFront"}, + {(char *)"arrayPushBack"}, + {(char *)"arrayPushFront"}, + {(char *)"arraySlice"}, + {(char *)"arrayUniq"}, + {(char *)"arrayJoin"}, + {(char *)"splitByChar"}, + {(char *)"splitByString"}, + {(char *)"arrayStringConcat"}, + {(char *)"alphaTokens"}, + {(char *)"bitAnd"}, + {(char *)"bitOr"}, + {(char *)"bitXor"}, + {(char *)"bitNot"}, + {(char *)"bitShiftLeft"}, + {(char *)"bitShiftRight"}, + {(char *)"halfMD5"}, + {(char *)"MD5"}, + {(char *)"sipHash64"}, + {(char *)"sipHash128"}, + {(char *)"cityHash64"}, + {(char *)"intHash32"}, + {(char *)"intHash64"}, + {(char *)"SHA1"}, + {(char *)"SHA224"}, + {(char *)"SHA256"}, + {(char *)"URLHash"}, + {(char *)"rand"}, + {(char *)"rand64"}, + {(char *)"hex"}, + {(char *)"unhex"}, + {(char *)"UUIDStringToNum"}, + {(char *)"UUIDNumToString"}, + {(char *)"bitmaskToList"}, + {(char *)"bitmaskToArray"}, + {(char *)"protocol"}, + {(char *)"domain"}, + {(char *)"domainWithoutWWW"}, + {(char *)"topLevelDomain"}, + {(char *)"firstSignificantSubdomain"}, + {(char *)"cutToFirstSignificantSubdomain"}, + {(char *)"path"}, + {(char *)"pathFull"}, + {(char *)"queryString"}, + {(char *)"fragment"}, + {(char *)"queryStringAndFragment"}, + {(char *)"extractURLParameter"}, + {(char *)"extractURLParameters"}, + {(char *)"extractURLParameterNames"}, + {(char *)"URLHierarchy"}, + {(char *)"URLPathHierarchy"}, + {(char *)"decodeURLComponent"}, + {(char *)"cutWWW"}, + {(char *)"cutQueryString"}, + {(char *)"cutFragment"}, + {(char *)"cutQueryStringAndFragment"}, + {(char *)"cutURLParameter"}, + {(char *)"IPv4NumToString"}, + {(char *)"IPv4StringToNum"}, + {(char *)"IPv6NumToString"}, + {(char *)"IPv6StringToNum"}, + {(char *)"visitParamHas"}, + {(char *)"visitParamExtractUInt"}, + {(char *)"visitParamExtractInt"}, + {(char *)"visitParamExtractFloat"}, + {(char *)"visitParamExtractBool"}, + {(char *)"visitParamExtractRaw"}, + {(char *)"visitParamExtractString"}, + {(char *)"arrayMap"}, + {(char *)"arrayFilter"}, + {(char *)"arrayCount"}, + {(char *)"arrayExists"}, + {(char *)"arrayAll"}, + {(char *)"arraySum"}, + {(char *)"arrayFirst"}, + {(char *)"arrayFirstIndex"}, + {(char *)"arrayCumSum"}, + {(char *)"arraySort"}, + {(char *)"arrayReverseSort"}, + {(char *)"hostName"}, + {(char *)"visibleWidth"}, + {(char *)"toTypeName"}, + {(char *)"blockSize"}, + {(char *)"materialize"}, + {(char *)"ignore"}, + {(char *)"sleep"}, + {(char *)"currentDatabase"}, + {(char *)"isFinite"}, + {(char *)"isInfinite"}, + {(char *)"isNaN"}, + {(char *)"hasColumnInTable"}, + {(char *)"bar"}, + {(char *)"transform"}, + {(char *)"formatReadableSize"}, + {(char *)"least"}, + {(char *)"greatest"}, + {(char *)"uptime"}, + {(char *)"version"}, + {(char *)"rowNumberInAllBlocks"}, + {(char *)"runningDifference"}, + {(char *)"MACNumToString"}, + {(char *)"MACStringToNum"}, + {(char *)"MACStringToOUI"}, + {(char *)"dictGetUInt8"}, + {(char *)"dictGetUInt16"}, + {(char *)"dictGetUInt32"}, + {(char *)"dictGetUInt64"}, + {(char *)"dictGetInt8"}, + {(char *)"dictGetInt16"}, + {(char *)"dictGetInt32"}, + {(char *)"dictGetInt64"}, + {(char *)"dictGetFloat32"}, + {(char *)"dictGetFloat64"}, + {(char *)"dictGetDate"}, + {(char *)"dictGetDateTime"}, + {(char *)"dictGetUUID"}, + {(char *)"dictGetString"}, + {(char *)"dictGetTOrDefault"}, + {(char *)"dictIsIn"}, + {(char *)"dictGetHierarchy"}, + {(char *)"dictHas"}, + {(char *)"regionToCity"}, + {(char *)"regionToArea"}, + {(char *)"regionToDistrict"}, + {(char *)"regionToCountry"}, + {(char *)"regionToContinent"}, + {(char *)"regionToPopulation"}, + {(char *)"regionIn"}, + {(char *)"regionHierarchy"}, + {(char *)"regionToName"}, + {(char *)"globalIn"}, + {(char *)"in"}, + {(char *)"notIn"}, + {(char *)"globalNotIn"}, + {(char *)"tuple"}, + {(char *)"tupleElement"}, + {(char *)"count"}, + {(char *)"any"}, + {(char *)"anyHeavy"}, + {(char *)"anyLast"}, + {(char *)"min"}, + {(char *)"max"}, + {(char *)"argMin"}, + {(char *)"argMax"}, + {(char *)"sum"}, + {(char *)"sumWithOverflow"}, + {(char *)"sumMap"}, + {(char *)"avg"}, + {(char *)"uniq"}, + {(char *)"uniqCombined"}, + {(char *)"uniqHLL12"}, + {(char *)"uniqExact"}, + {(char *)"groupArray"}, + {(char *)"groupArrayInsertAt"}, + {(char *)"groupUniqArray"}, + {(char *)"quantile"}, + {(char *)"quantileDeterministic"}, + {(char *)"quantileTiming"}, + {(char *)"quantileTimingWeighted"}, + {(char *)"quantileExact"}, + {(char *)"quantileExactWeighted"}, + {(char *)"quantileTDigest"}, + {(char *)"median"}, + {(char *)"quantiles"}, + {(char *)"varSamp"}, + {(char *)"varPop"}, + {(char *)"stddevSamp"}, + {(char *)"stddevPop"}, + {(char *)"topK"}, + {(char *)"covarSamp"}, + {(char *)"covarPop"}, + {(char *)"corr"}, + {(char *)"sequenceMatch"}, + {(char *)"sequenceCount"}, + {(char *)"windowFunnel"}, + {(char *)"uniqUpTo"}, + //END OF LIST + {(char *)nullptr}, +}; + +#endif //CLICKHOUSE_QUERY_PARTS_H diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 8fa3e081aea..3c17550d95f 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include "InterruptListener.h" #include #include @@ -343,10 +343,10 @@ private: void init_suggestions(Completion::HashTable *ht) { Completion::init_hash_table(ht, 128); - COMMAND *cmd = commands; - while (cmd->name) { - Completion::hash_add_word(ht, cmd->name); - cmd++; + QUERYPART *qP = queryParts; + while (qP->name) { + Completion::hash_add_word(ht, qP->name); + qP++; } rl_attempted_completion_function = commands_completion; } From be0f0ecef94082cec3aa732d4c3753bb717f3505 Mon Sep 17 00:00:00 2001 From: morty Date: Wed, 30 May 2018 17:45:43 +0300 Subject: [PATCH 025/192] Free hash after usage and dont use it at all if no readline present --- dbms/src/Client/Completion.cpp | 21 +++++++++++++-------- dbms/src/Client/Completion.h | 6 ++++-- dbms/src/Server/Client.cpp | 17 +++++++++++------ 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/dbms/src/Client/Completion.cpp b/dbms/src/Client/Completion.cpp index 2e73abd211f..29b0d784d99 100644 --- a/dbms/src/Client/Completion.cpp +++ b/dbms/src/Client/Completion.cpp @@ -26,10 +26,10 @@ namespace Completion ht->buckets = (Bucket **)calloc(size, sizeof(Bucket)); if (!ht->buckets) { ht->initialized = false; - return FAILURE; + return HASH_FAILURE; } ht->initialized = true; - return SUCCESS; + return HASH_SUCCESS; } void hash_add_word(HashTable *ht, char *word) @@ -48,7 +48,7 @@ namespace Completion Bucket *bucket; if (keyLength <= 0) { - return FAILURE; + return HASH_FAILURE; } hash = ht->hashFunction(key, keyLength); bucketIndex = hash % ht->tableSize; @@ -58,20 +58,20 @@ namespace Completion if (!memcmp(bucket->key, key, keyLength)) { auto *entry = (HashEntry *) calloc(1, sizeof(HashEntry)); if (entry == nullptr) { - return FAILURE; + return HASH_FAILURE; } entry->text = word; entry->next = bucket->entry; bucket->entry = entry; - return SUCCESS; + return HASH_SUCCESS; } } bucket = bucket->next; } bucket = (Bucket *) calloc(1, sizeof(Bucket)); if (bucket == nullptr) { - return FAILURE; + return HASH_FAILURE; } bucket->key = key; bucket->keyLength = keyLength; @@ -79,7 +79,7 @@ namespace Completion bucket->entry = (HashEntry *) calloc(1, sizeof(HashEntry)); if (bucket->entry == nullptr) { - return FAILURE; + return HASH_FAILURE; } bucket->entry->text = word; @@ -89,7 +89,7 @@ namespace Completion ht->buckets[bucketIndex] = bucket; - return SUCCESS; + return HASH_SUCCESS; } Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length) @@ -117,6 +117,11 @@ namespace Completion return (Bucket *) nullptr; } + + void hash_free(HashTable *ht) + { + free(ht->buckets); + } } diff --git a/dbms/src/Client/Completion.h b/dbms/src/Client/Completion.h index 2239cf3d97c..84d50514d7d 100644 --- a/dbms/src/Client/Completion.h +++ b/dbms/src/Client/Completion.h @@ -1,11 +1,12 @@ #ifndef CLICKHOUSE_COMPLETION_H #define CLICKHOUSE_COMPLETION_H -#define SUCCESS 0 -#define FAILURE 1 +#define HASH_SUCCESS 0 +#define HASH_FAILURE 1 #include +//All of functionality for hash was taken from mysql-server project from completion_hash.cpp file namespace Completion { struct HashEntry { @@ -31,6 +32,7 @@ namespace Completion int init_hash_table(HashTable *ht, size_t size); void hash_add_word(HashTable *ht, char *word); int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word); + void hash_free(HashTable *ht); Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length); } diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 3c17550d95f..939d0cf2af7 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -58,8 +58,8 @@ #include static Completion::HashTable ht; -char ** commands_completion(const char *, int, int); -char * commands_generator(const char *, int); +char ** query_parts_completion(const char *, int, int); +char * query_parts_generator(const char *, int); /// http://en.wikipedia.org/wiki/ANSI_escape_code @@ -348,7 +348,7 @@ private: Completion::hash_add_word(ht, qP->name); qP++; } - rl_attempted_completion_function = commands_completion; + rl_attempted_completion_function = query_parts_completion; } @@ -461,7 +461,9 @@ private: throw Exception("query_id could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); +#if USE_READLINE init_suggestions(&ht); +#endif /// Turn tab completion off. // rl_bind_key('\t', rl_insert); @@ -487,6 +489,9 @@ private: } loop(); +#if USE_READLINE + Completion::hash_free(&ht); +#endif std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; @@ -1566,13 +1571,13 @@ int mainEntryClickHouseClient(int argc, char ** argv) return client.run(); } -char ** commands_completion(const char *text, int start, int end) +char ** query_parts_completion(const char *text, int start, int end) { rl_attempted_completion_over = start + end + 1; - return rl_completion_matches(text, commands_generator); + return rl_completion_matches(text, query_parts_generator); } -char * commands_generator(const char * text, int state) +char * query_parts_generator(const char *text, int state) { static int text_length; static Completion::Bucket *bucket; From 3ad80a6a0fb65e42f2301201f0201708785b214c Mon Sep 17 00:00:00 2001 From: morty Date: Mon, 4 Jun 2018 17:50:16 +0300 Subject: [PATCH 026/192] Removed hashtable use ternary search tree --- dbms/src/Client/Completion.cpp | 177 ++++++++--------- dbms/src/Client/Completion.h | 56 +++--- dbms/src/Client/QueryParts.h | 344 +-------------------------------- dbms/src/Server/Client.cpp | 22 +-- 4 files changed, 115 insertions(+), 484 deletions(-) diff --git a/dbms/src/Client/Completion.cpp b/dbms/src/Client/Completion.cpp index 29b0d784d99..bf976dc1f78 100644 --- a/dbms/src/Client/Completion.cpp +++ b/dbms/src/Client/Completion.cpp @@ -1,126 +1,105 @@ -#include -#include #include "Completion.h" namespace Completion { - static uint hashpjw(const char *arKey, uint nKeyLength) + void TSTNode::add_word(char *word) { - uint h = 0; - uint g; - uint i; - for (i = 0; i < nKeyLength; i++) { - h = (h << 4) + arKey[i]; - if ((g = (h & 0xF0000000))) { - h = h ^ (g >> 24); - h = h ^ g; + insert(word, word); + } + + void TSTNode::insert(char *word, char *remainder) + { + if (!*remainder) { + return; + } + + if (!token) { + token = *remainder; + } + + if (token > *remainder) { + if (!left) { + left = (TSTNode *) calloc(1, sizeof(TSTNode)); } + return left->insert(word, remainder); } - return h; - } - int init_hash_table(HashTable *ht, size_t size) - { - ht->hashFunction = hashpjw; - ht->tableSize = size; - ht->buckets = (Bucket **)calloc(size, sizeof(Bucket)); - if (!ht->buckets) { - ht->initialized = false; - return HASH_FAILURE; - } - ht->initialized = true; - return HASH_SUCCESS; - } - - void hash_add_word(HashTable *ht, char *word) - { - uint i; - char *pos = word; - for (i = 1; *pos; i++, pos++) { - hash_insert_key(ht, word, i, word); - }; - } - - int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word) - { - uint hash; - size_t bucketIndex; - Bucket *bucket; - - if (keyLength <= 0) { - return HASH_FAILURE; - } - hash = ht->hashFunction(key, keyLength); - bucketIndex = hash % ht->tableSize; - bucket = ht->buckets[bucketIndex]; - while (bucket) { - if ( (bucket->hash == hash) && (bucket->keyLength == keyLength)) { - if (!memcmp(bucket->key, key, keyLength)) { - auto *entry = (HashEntry *) calloc(1, sizeof(HashEntry)); - if (entry == nullptr) { - return HASH_FAILURE; - } - entry->text = word; - entry->next = bucket->entry; - bucket->entry = entry; - - return HASH_SUCCESS; - } + if (token < *remainder) { + if (!right) { + right = (TSTNode *) calloc(1, sizeof(TSTNode)); } - bucket = bucket->next; - } - bucket = (Bucket *) calloc(1, sizeof(Bucket)); - if (bucket == nullptr) { - return HASH_FAILURE; - } - bucket->key = key; - bucket->keyLength = keyLength; - bucket->hash = hash; - - bucket->entry = (HashEntry *) calloc(1, sizeof(HashEntry)); - if (bucket->entry == nullptr) { - return HASH_FAILURE; + return right->insert(word, remainder); } - bucket->entry->text = word; - bucket->entry->next = nullptr; + auto newEntry = (Entry *) calloc(1, sizeof(Entry)); + newEntry->text = word; + newEntry->next = entry; + entry = newEntry; - bucket->next = ht->buckets[bucketIndex]; + if (!middle) { + middle = (TSTNode *) calloc(1, sizeof(TSTNode)); + } - ht->buckets[bucketIndex] = bucket; - - return HASH_SUCCESS; + return middle->insert(word, ++remainder); } - Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length) + Entry * TSTNode::find_all(const char *word) { - Bucket *bucket; - uint hash; - size_t bucketIndex; - hash = ht->hashFunction(word, length); - bucketIndex = hash % ht->tableSize; - bucket = ht->buckets[bucketIndex]; + if (!word) { + return (Entry *) nullptr; + } - while (bucket) { - if ( - (bucket->hash == hash) - && (bucket->keyLength == length) - && (!memcmp(bucket->key, word, length)) - ) { - *res_length = length; - return bucket; + return find(word, word); + } + + Entry * TSTNode::find(const char *word, const char *remainder) + { + if (token > *remainder) { + if (!left) { + return (Entry *) nullptr; } - bucket = bucket->next; + return left->find(word, remainder); } - *res_length = 0; + if (token < *remainder) { + if (!right) { + return (Entry *) nullptr; + } + return right->find(word, remainder); + } - return (Bucket *) nullptr; + if (!middle) { + return (Entry *) nullptr; + } + + if (strlen(remainder) == 1) { + return entry; + } + + return middle->find(word, ++remainder); } - void hash_free(HashTable *ht) + void TSTNode::free() { - free(ht->buckets); + if (left) { + left->free(); + std::free(left); + } + + if (right) { + right->free(); + std::free(right); + } + + if (middle) { + middle->free(); + std::free(middle); + } + + if (entry) { + entry->free(); + std::free(entry); + } } } diff --git a/dbms/src/Client/Completion.h b/dbms/src/Client/Completion.h index 84d50514d7d..d82c6506f44 100644 --- a/dbms/src/Client/Completion.h +++ b/dbms/src/Client/Completion.h @@ -1,39 +1,41 @@ #ifndef CLICKHOUSE_COMPLETION_H #define CLICKHOUSE_COMPLETION_H -#define HASH_SUCCESS 0 -#define HASH_FAILURE 1 +#include +#include +#include -#include - -//All of functionality for hash was taken from mysql-server project from completion_hash.cpp file namespace Completion { - struct HashEntry { - char *text; - struct HashEntry *next; - }; - struct Bucket { - uint hash; - char *key; - uint keyLength; - HashEntry *entry; - struct Bucket *next; - }; +struct Entry { + char *text; + struct Entry *next; + void free() + { + if (next) { + next->free(); + std::free(next); + } + std::free(text); + } +}; - struct HashTable { - bool initialized; - size_t tableSize; - uint (*hashFunction)(const char *key, uint keyLength); - Bucket **buckets; - }; +class TSTNode { +private: + TSTNode *left; + TSTNode *right; + TSTNode *middle; + char token; + Entry *entry; + void insert(char *word, char *remainder); + Entry * find(const char *word, const char *remainder); +public: + void add_word(char *word); + Entry * find_all(const char *word); + void free(); +}; - int init_hash_table(HashTable *ht, size_t size); - void hash_add_word(HashTable *ht, char *word); - int hash_insert_key(HashTable *ht, char *key, uint keyLength, char* word); - void hash_free(HashTable *ht); - Bucket * hash_find_all_matches(HashTable *ht, const char *word, uint length, uint *res_length); } #endif //CLICKHOUSE_COMPLETION_H diff --git a/dbms/src/Client/QueryParts.h b/dbms/src/Client/QueryParts.h index d42f8ab254e..9ad2b2451f4 100644 --- a/dbms/src/Client/QueryParts.h +++ b/dbms/src/Client/QueryParts.h @@ -2,7 +2,7 @@ #define CLICKHOUSE_QUERY_PARTS_H typedef struct { - char *name; /* User printable name of the function. */ + char *name; /* User printable name of the query part. */ } QUERYPART; QUERYPART queryParts[] = { @@ -173,348 +173,6 @@ QUERYPART queryParts[] = { {(char *)"Nested"}, {(char *)"Expression"}, {(char *)"Set"}, - //FUNCTIONS - {(char *)"plus"}, - {(char *)"minus"}, - {(char *)"multiply"}, - {(char *)"divide"}, - {(char *)"intDiv"}, - {(char *)"intDivOrZero"}, - {(char *)"modulo"}, - {(char *)"negate"}, - {(char *)"abs"}, - {(char *)"gcd"}, - {(char *)"lcm"}, - {(char *)"equals"}, - {(char *)"notEquals"}, - {(char *)"less"}, - {(char *)"greater"}, - {(char *)"lessOrEquals"}, - {(char *)"greaterOrEquals"}, - {(char *)"and"}, - {(char *)"or"}, - {(char *)"not"}, - {(char *)"xor"}, - {(char *)"toUInt8"}, - {(char *)"toUInt16"}, - {(char *)"toUInt32"}, - {(char *)"toUInt64"}, - {(char *)"toInt8"}, - {(char *)"toInt16"}, - {(char *)"toInt32"}, - {(char *)"toInt64"}, - {(char *)"toFloat32"}, - {(char *)"toFloat64"}, - {(char *)"toUInt8OrZero"}, - {(char *)"toUInt16OrZero"}, - {(char *)"toUInt32OrZero"}, - {(char *)"toUInt64OrZero"}, - {(char *)"toInt8OrZero"}, - {(char *)"toInt16OrZero"}, - {(char *)"toInt32OrZero"}, - {(char *)"toInt64OrZero"}, - {(char *)"toFloat32OrZero"}, - {(char *)"toFloat64OrZero"}, - {(char *)"toDate"}, - {(char *)"toDateTime"}, - {(char *)"toString"}, - {(char *)"toFixedString"}, - {(char *)"toStringCutToZero"}, - {(char *)"reinterpretAsUInt8"}, - {(char *)"reinterpretAsUInt16"}, - {(char *)"reinterpretAsUInt32"}, - {(char *)"reinterpretAsUInt64"}, - {(char *)"reinterpretAsInt8"}, - {(char *)"reinterpretAsInt16"}, - {(char *)"reinterpretAsInt32"}, - {(char *)"reinterpretAsInt64"}, - {(char *)"reinterpretAsFloat32"}, - {(char *)"reinterpretAsFloat64"}, - {(char *)"reinterpretAsDate"}, - {(char *)"reinterpretAsDateTime"}, - {(char *)"reinterpretAsString"}, - {(char *)"CAST"}, - {(char *)"toYear"}, - {(char *)"toMonth"}, - {(char *)"toDayOfMonth"}, - {(char *)"toDayOfWeek"}, - {(char *)"toHour"}, - {(char *)"toMinute"}, - {(char *)"toSecond"}, - {(char *)"toMonday"}, - {(char *)"toStartOfMonth"}, - {(char *)"toStartOfQuarter"}, - {(char *)"toStartOfYear"}, - {(char *)"toStartOfMinute"}, - {(char *)"toStartOfFiveMinute"}, - {(char *)"toStartOfFifteenMinutes"}, - {(char *)"toStartOfHour"}, - {(char *)"toStartOfDay"}, - {(char *)"toTime"}, - {(char *)"toRelativeYearNum"}, - {(char *)"toRelativeMonthNum"}, - {(char *)"toRelativeWeekNum"}, - {(char *)"toRelativeDayNum"}, - {(char *)"toRelativeHourNum"}, - {(char *)"toRelativeMinuteNum"}, - {(char *)"toRelativeSecondNum"}, - {(char *)"now"}, - {(char *)"today"}, - {(char *)"yesterday"}, - {(char *)"timeSlot"}, - {(char *)"empty"}, - {(char *)"notEmpty"}, - {(char *)"length"}, - {(char *)"lengthUTF8"}, - {(char *)"lower"}, - {(char *)"upper"}, - {(char *)"lowerUTF8"}, - {(char *)"upperUTF8"}, - {(char *)"reverse"}, - {(char *)"reverseUTF8"}, - {(char *)"concat"}, - {(char *)"substringUTF8"}, - {(char *)"appendTrailingCharIfAbsent"}, - {(char *)"convertCharset"}, - {(char *)"position"}, - {(char *)"positionUTF8"}, - {(char *)"match"}, - {(char *)"extract"}, - {(char *)"extractAll"}, - {(char *)"like"}, - {(char *)"notLike"}, - {(char *)"replaceOne"}, - {(char *)"replaceAll"}, - {(char *)"replaceRegexpOne"}, - {(char *)"replaceRegexpAll"}, - {(char *)"if"}, - {(char *)"e"}, - {(char *)"pi"}, - {(char *)"exp"}, - {(char *)"log"}, - {(char *)"exp2"}, - {(char *)"log2"}, - {(char *)"exp10"}, - {(char *)"log10"}, - {(char *)"sqrt"}, - {(char *)"cbrt"}, - {(char *)"erf"}, - {(char *)"erfc"}, - {(char *)"lgamma"}, - {(char *)"tgamma"}, - {(char *)"sin"}, - {(char *)"cos"}, - {(char *)"tan"}, - {(char *)"asin"}, - {(char *)"acos"}, - {(char *)"atan"}, - {(char *)"pow"}, - {(char *)"floor"}, - {(char *)"ceil"}, - {(char *)"round"}, - {(char *)"roundToExp2"}, - {(char *)"roundDuration"}, - {(char *)"roundAge"}, - {(char *)"emptyArrayUInt8"}, - {(char *)"emptyArrayUInt16"}, - {(char *)"emptyArrayUInt32"}, - {(char *)"emptyArrayUInt64"}, - {(char *)"emptyArrayInt8"}, - {(char *)"emptyArrayInt16"}, - {(char *)"emptyArrayInt32"}, - {(char *)"emptyArrayInt64"}, - {(char *)"emptyArrayFloat32"}, - {(char *)"emptyArrayFloat64"}, - {(char *)"emptyArrayDate"}, - {(char *)"emptyArrayDateTime"}, - {(char *)"emptyArrayString"}, - {(char *)"emptyArrayToSingle"}, - {(char *)"range"}, - {(char *)"array"}, - {(char *)"arrayConcat"}, - {(char *)"arrayElement"}, - {(char *)"has"}, - {(char *)"indexOf"}, - {(char *)"countEqual"}, - {(char *)"arrayEnumerate"}, - {(char *)"arrayEnumerateUniq"}, - {(char *)"arrayPopBack"}, - {(char *)"arrayPopFront"}, - {(char *)"arrayPushBack"}, - {(char *)"arrayPushFront"}, - {(char *)"arraySlice"}, - {(char *)"arrayUniq"}, - {(char *)"arrayJoin"}, - {(char *)"splitByChar"}, - {(char *)"splitByString"}, - {(char *)"arrayStringConcat"}, - {(char *)"alphaTokens"}, - {(char *)"bitAnd"}, - {(char *)"bitOr"}, - {(char *)"bitXor"}, - {(char *)"bitNot"}, - {(char *)"bitShiftLeft"}, - {(char *)"bitShiftRight"}, - {(char *)"halfMD5"}, - {(char *)"MD5"}, - {(char *)"sipHash64"}, - {(char *)"sipHash128"}, - {(char *)"cityHash64"}, - {(char *)"intHash32"}, - {(char *)"intHash64"}, - {(char *)"SHA1"}, - {(char *)"SHA224"}, - {(char *)"SHA256"}, - {(char *)"URLHash"}, - {(char *)"rand"}, - {(char *)"rand64"}, - {(char *)"hex"}, - {(char *)"unhex"}, - {(char *)"UUIDStringToNum"}, - {(char *)"UUIDNumToString"}, - {(char *)"bitmaskToList"}, - {(char *)"bitmaskToArray"}, - {(char *)"protocol"}, - {(char *)"domain"}, - {(char *)"domainWithoutWWW"}, - {(char *)"topLevelDomain"}, - {(char *)"firstSignificantSubdomain"}, - {(char *)"cutToFirstSignificantSubdomain"}, - {(char *)"path"}, - {(char *)"pathFull"}, - {(char *)"queryString"}, - {(char *)"fragment"}, - {(char *)"queryStringAndFragment"}, - {(char *)"extractURLParameter"}, - {(char *)"extractURLParameters"}, - {(char *)"extractURLParameterNames"}, - {(char *)"URLHierarchy"}, - {(char *)"URLPathHierarchy"}, - {(char *)"decodeURLComponent"}, - {(char *)"cutWWW"}, - {(char *)"cutQueryString"}, - {(char *)"cutFragment"}, - {(char *)"cutQueryStringAndFragment"}, - {(char *)"cutURLParameter"}, - {(char *)"IPv4NumToString"}, - {(char *)"IPv4StringToNum"}, - {(char *)"IPv6NumToString"}, - {(char *)"IPv6StringToNum"}, - {(char *)"visitParamHas"}, - {(char *)"visitParamExtractUInt"}, - {(char *)"visitParamExtractInt"}, - {(char *)"visitParamExtractFloat"}, - {(char *)"visitParamExtractBool"}, - {(char *)"visitParamExtractRaw"}, - {(char *)"visitParamExtractString"}, - {(char *)"arrayMap"}, - {(char *)"arrayFilter"}, - {(char *)"arrayCount"}, - {(char *)"arrayExists"}, - {(char *)"arrayAll"}, - {(char *)"arraySum"}, - {(char *)"arrayFirst"}, - {(char *)"arrayFirstIndex"}, - {(char *)"arrayCumSum"}, - {(char *)"arraySort"}, - {(char *)"arrayReverseSort"}, - {(char *)"hostName"}, - {(char *)"visibleWidth"}, - {(char *)"toTypeName"}, - {(char *)"blockSize"}, - {(char *)"materialize"}, - {(char *)"ignore"}, - {(char *)"sleep"}, - {(char *)"currentDatabase"}, - {(char *)"isFinite"}, - {(char *)"isInfinite"}, - {(char *)"isNaN"}, - {(char *)"hasColumnInTable"}, - {(char *)"bar"}, - {(char *)"transform"}, - {(char *)"formatReadableSize"}, - {(char *)"least"}, - {(char *)"greatest"}, - {(char *)"uptime"}, - {(char *)"version"}, - {(char *)"rowNumberInAllBlocks"}, - {(char *)"runningDifference"}, - {(char *)"MACNumToString"}, - {(char *)"MACStringToNum"}, - {(char *)"MACStringToOUI"}, - {(char *)"dictGetUInt8"}, - {(char *)"dictGetUInt16"}, - {(char *)"dictGetUInt32"}, - {(char *)"dictGetUInt64"}, - {(char *)"dictGetInt8"}, - {(char *)"dictGetInt16"}, - {(char *)"dictGetInt32"}, - {(char *)"dictGetInt64"}, - {(char *)"dictGetFloat32"}, - {(char *)"dictGetFloat64"}, - {(char *)"dictGetDate"}, - {(char *)"dictGetDateTime"}, - {(char *)"dictGetUUID"}, - {(char *)"dictGetString"}, - {(char *)"dictGetTOrDefault"}, - {(char *)"dictIsIn"}, - {(char *)"dictGetHierarchy"}, - {(char *)"dictHas"}, - {(char *)"regionToCity"}, - {(char *)"regionToArea"}, - {(char *)"regionToDistrict"}, - {(char *)"regionToCountry"}, - {(char *)"regionToContinent"}, - {(char *)"regionToPopulation"}, - {(char *)"regionIn"}, - {(char *)"regionHierarchy"}, - {(char *)"regionToName"}, - {(char *)"globalIn"}, - {(char *)"in"}, - {(char *)"notIn"}, - {(char *)"globalNotIn"}, - {(char *)"tuple"}, - {(char *)"tupleElement"}, - {(char *)"count"}, - {(char *)"any"}, - {(char *)"anyHeavy"}, - {(char *)"anyLast"}, - {(char *)"min"}, - {(char *)"max"}, - {(char *)"argMin"}, - {(char *)"argMax"}, - {(char *)"sum"}, - {(char *)"sumWithOverflow"}, - {(char *)"sumMap"}, - {(char *)"avg"}, - {(char *)"uniq"}, - {(char *)"uniqCombined"}, - {(char *)"uniqHLL12"}, - {(char *)"uniqExact"}, - {(char *)"groupArray"}, - {(char *)"groupArrayInsertAt"}, - {(char *)"groupUniqArray"}, - {(char *)"quantile"}, - {(char *)"quantileDeterministic"}, - {(char *)"quantileTiming"}, - {(char *)"quantileTimingWeighted"}, - {(char *)"quantileExact"}, - {(char *)"quantileExactWeighted"}, - {(char *)"quantileTDigest"}, - {(char *)"median"}, - {(char *)"quantiles"}, - {(char *)"varSamp"}, - {(char *)"varPop"}, - {(char *)"stddevSamp"}, - {(char *)"stddevPop"}, - {(char *)"topK"}, - {(char *)"covarSamp"}, - {(char *)"covarPop"}, - {(char *)"corr"}, - {(char *)"sequenceMatch"}, - {(char *)"sequenceCount"}, - {(char *)"windowFunnel"}, - {(char *)"uniqUpTo"}, //END OF LIST {(char *)nullptr}, }; diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 939d0cf2af7..f054e4538c5 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -57,7 +57,7 @@ #include #include -static Completion::HashTable ht; +static Completion::TSTNode completionNode; char ** query_parts_completion(const char *, int, int); char * query_parts_generator(const char *, int); @@ -340,12 +340,11 @@ private: || (now.month() == 1 && now.day() <= 5); } - void init_suggestions(Completion::HashTable *ht) + void init_suggestions() { - Completion::init_hash_table(ht, 128); QUERYPART *qP = queryParts; while (qP->name) { - Completion::hash_add_word(ht, qP->name); + completionNode.add_word(qP->name); qP++; } rl_attempted_completion_function = query_parts_completion; @@ -462,7 +461,7 @@ private: if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); #if USE_READLINE - init_suggestions(&ht); + init_suggestions(); #endif /// Turn tab completion off. @@ -489,9 +488,6 @@ private: } loop(); -#if USE_READLINE - Completion::hash_free(&ht); -#endif std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; @@ -1580,19 +1576,15 @@ char ** query_parts_completion(const char *text, int start, int end) char * query_parts_generator(const char *text, int state) { static int text_length; - static Completion::Bucket *bucket; - static Completion::HashEntry *entry; + static Completion::Entry *entry; char * found; if (!state) text_length = (uint) strlen(text); if (text_length > 0) { if (!state) { - uint length; - - bucket = Completion::hash_find_all_matches(&ht, text, (uint)strlen(text), &length); - if (!bucket) return (char *) nullptr; - entry = bucket->entry; + entry = completionNode.find_all(text); + if (!entry) return (char *) nullptr; } if (entry) { found = strdup(entry->text); From 9b32d1ea6677b8d9b3e957c9364fe28f24816f73 Mon Sep 17 00:00:00 2001 From: morty Date: Mon, 4 Jun 2018 17:50:45 +0300 Subject: [PATCH 027/192] Removed unneeded inclusion of source and header --- dbms/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index dd3217123a6..813c63109fb 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -32,7 +32,7 @@ find_package (Threads) add_subdirectory (src) set(dbms_headers) -set(dbms_sources src/Client/Completion.h src/Client/Completion.cpp src/Client/QueryParts.h) +set(dbms_sources) include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake) From 2479453aa52a536a23359c66886dc56101af07c6 Mon Sep 17 00:00:00 2001 From: morty Date: Mon, 4 Jun 2018 17:59:24 +0300 Subject: [PATCH 028/192] Free memory before exit --- dbms/src/Client/Completion.cpp | 2 +- dbms/src/Server/Client.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Client/Completion.cpp b/dbms/src/Client/Completion.cpp index bf976dc1f78..2d7489613c8 100644 --- a/dbms/src/Client/Completion.cpp +++ b/dbms/src/Client/Completion.cpp @@ -32,7 +32,7 @@ namespace Completion } auto newEntry = (Entry *) calloc(1, sizeof(Entry)); - newEntry->text = word; + newEntry->text = strdup(word); newEntry->next = entry; entry = newEntry; diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index f054e4538c5..9af4b3166a0 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -489,6 +489,9 @@ private: loop(); +#if USE_READLINE + completionNode.free(); +#endif std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; return 0; From fba5fdaf64c3eb9245c9ba8429b93a2567c705e5 Mon Sep 17 00:00:00 2001 From: morty Date: Wed, 6 Jun 2018 19:37:04 +0300 Subject: [PATCH 029/192] Added autoload and limit --- dbms/src/Client/Completion.cpp | 33 +++- dbms/src/Client/Completion.h | 9 +- dbms/src/Client/QueryParts.h | 302 ++++++++++++++++----------------- dbms/src/Server/Client.cpp | 122 +++++++++++-- 4 files changed, 295 insertions(+), 171 deletions(-) diff --git a/dbms/src/Client/Completion.cpp b/dbms/src/Client/Completion.cpp index 2d7489613c8..2a862d19413 100644 --- a/dbms/src/Client/Completion.cpp +++ b/dbms/src/Client/Completion.cpp @@ -1,13 +1,15 @@ #include "Completion.h" +#include + namespace Completion { - void TSTNode::add_word(char *word) + void TSTNode::add_word(const char *word) { insert(word, word); } - void TSTNode::insert(char *word, char *remainder) + void TSTNode::insert(const char *word, const char *remainder) { if (!*remainder) { return; @@ -79,6 +81,33 @@ namespace Completion return middle->find(word, ++remainder); } + bool TSTNode::has(const char *word) + { + if (!word) { + return true; + } + + if (token > *word) { + if (!left) { + return false; + } + return left->has(word); + } + + if (token < *word) { + if (!right) { + return false; + } + return right->has(word); + } + + if (!middle) { + return false; + } + + return middle->has(++word); + } + void TSTNode::free() { if (left) { diff --git a/dbms/src/Client/Completion.h b/dbms/src/Client/Completion.h index d82c6506f44..2692a546da0 100644 --- a/dbms/src/Client/Completion.h +++ b/dbms/src/Client/Completion.h @@ -1,9 +1,7 @@ #ifndef CLICKHOUSE_COMPLETION_H #define CLICKHOUSE_COMPLETION_H -#include -#include -#include +#include namespace Completion { @@ -28,11 +26,12 @@ private: TSTNode *middle; char token; Entry *entry; - void insert(char *word, char *remainder); + void insert(const char *word, const char *remainder); Entry * find(const char *word, const char *remainder); public: - void add_word(char *word); + void add_word(const char *word); Entry * find_all(const char *word); + bool has(const char *word); void free(); }; diff --git a/dbms/src/Client/QueryParts.h b/dbms/src/Client/QueryParts.h index 9ad2b2451f4..bb9a48c7187 100644 --- a/dbms/src/Client/QueryParts.h +++ b/dbms/src/Client/QueryParts.h @@ -2,179 +2,179 @@ #define CLICKHOUSE_QUERY_PARTS_H typedef struct { - char *name; /* User printable name of the query part. */ + const char *name; /* User printable name of the query part. */ } QUERYPART; QUERYPART queryParts[] = { // CREATE DATABASE, TABLE, VIEW - {(char *)"CREATE"}, - {(char *)"DATABASE"}, - {(char *)"IF"}, - {(char *)"NOT"}, - {(char *)"EXISTS"}, - {(char *)"TEMPORARY"}, - {(char *)"TABLE"}, - {(char *)"ON"}, - {(char *)"CLUSTER"}, - {(char *)"DEFAULT"}, - {(char *)"MATERIALIZED"}, - {(char *)"ALIAS"}, - {(char *)"ENGINE"}, - {(char *)"AS"}, - {(char *)"VIEW"}, - {(char *)"POPULATE"}, + {(const char *)"CREATE"}, + {(const char *)"DATABASE"}, + {(const char *)"IF"}, + {(const char *)"NOT"}, + {(const char *)"EXISTS"}, + {(const char *)"TEMPORARY"}, + {(const char *)"TABLE"}, + {(const char *)"ON"}, + {(const char *)"CLUSTER"}, + {(const char *)"DEFAULT"}, + {(const char *)"MATERIALIZED"}, + {(const char *)"ALIAS"}, + {(const char *)"ENGINE"}, + {(const char *)"AS"}, + {(const char *)"VIEW"}, + {(const char *)"POPULATE"}, //ATTACH/DETACH - {(char *)"ATTACH"}, - {(char *)"DETACH"}, + {(const char *)"ATTACH"}, + {(const char *)"DETACH"}, //DROP - {(char *)"DROP"}, + {(const char *)"DROP"}, //RENAME - {(char *)"RENAME"}, - {(char *)"TO"}, + {(const char *)"RENAME"}, + {(const char *)"TO"}, //ALTER - {(char *)"ALTER"}, - {(char *)"ADD"}, - {(char *)"MODIFY"}, - {(char *)"COLUMN"}, - {(char *)"AFTER"}, + {(const char *)"ALTER"}, + {(const char *)"ADD"}, + {(const char *)"MODIFY"}, + {(const char *)"COLUMN"}, + {(const char *)"AFTER"}, //PARTITIONS - {(char *)"PARTITION"}, - {(char *)"PART"}, - {(char *)"FREEZE"}, - {(char *)"FETCH"}, - {(char *)"FROM"}, + {(const char *)"PARTITION"}, + {(const char *)"PART"}, + {(const char *)"FREEZE"}, + {(const char *)"FETCH"}, + {(const char *)"FROM"}, //SHOW DATABASES,TABLES,PROCESSLIST - {(char *)"SHOW"}, - {(char *)"INTO"}, - {(char *)"OUTFILE"}, - {(char *)"FORMAT"}, - {(char *)"TABLES"}, - {(char *)"LIKE"}, - {(char *)"PROCESSLIST"}, + {(const char *)"SHOW"}, + {(const char *)"INTO"}, + {(const char *)"OUTFILE"}, + {(const char *)"FORMAT"}, + {(const char *)"TABLES"}, + {(const char *)"LIKE"}, + {(const char *)"PROCESSLIST"}, //DESCRIBE - {(char *)"DESCRIBE"}, - {(char *)"DESC"}, + {(const char *)"DESCRIBE"}, + {(const char *)"DESC"}, //USE - {(char *)"USE"}, + {(const char *)"USE"}, //SET - {(char *)"SET"}, + {(const char *)"SET"}, //OPTIMIZE - {(char *)"OPTIMIZE"}, - {(char *)"FINAL"}, + {(const char *)"OPTIMIZE"}, + {(const char *)"FINAL"}, //INSERT - {(char *)"INSERT"}, - {(char *)"VALUES"}, + {(const char *)"INSERT"}, + {(const char *)"VALUES"}, //SELECT - {(char *)"SELECT"}, - {(char *)"DISTINCT"}, - {(char *)"SAMPLE"}, - {(char *)"ARRAY"}, - {(char *)"JOIN"}, - {(char *)"GLOBAL"}, - {(char *)"ANY"}, - {(char *)"ALL"}, - {(char *)"INNER"}, - {(char *)"LEFT"}, - {(char *)"USING"}, - {(char *)"PREWHERE"}, - {(char *)"WHERE"}, - {(char *)"GROUP"}, - {(char *)"BY"}, - {(char *)"WITH"}, - {(char *)"TOTALS"}, - {(char *)"HAVING"}, - {(char *)"ORDER"}, - {(char *)"LIMIT"}, - {(char *)"UNION"}, - {(char *)"AND"}, - {(char *)"OR"}, - {(char *)"ASC"}, + {(const char *)"SELECT"}, + {(const char *)"DISTINCT"}, + {(const char *)"SAMPLE"}, + {(const char *)"ARRAY"}, + {(const char *)"JOIN"}, + {(const char *)"GLOBAL"}, + {(const char *)"ANY"}, + {(const char *)"ALL"}, + {(const char *)"INNER"}, + {(const char *)"LEFT"}, + {(const char *)"USING"}, + {(const char *)"PREWHERE"}, + {(const char *)"WHERE"}, + {(const char *)"GROUP"}, + {(const char *)"BY"}, + {(const char *)"WITH"}, + {(const char *)"TOTALS"}, + {(const char *)"HAVING"}, + {(const char *)"ORDER"}, + {(const char *)"LIMIT"}, + {(const char *)"UNION"}, + {(const char *)"AND"}, + {(const char *)"OR"}, + {(const char *)"ASC"}, //IN - {(char *)"IN"}, + {(const char *)"IN"}, //KILL QUERY - {(char *)"KILL"}, - {(char *)"QUERY"}, - {(char *)"SYNC"}, - {(char *)"ASYNC"}, - {(char *)"TEST"}, + {(const char *)"KILL"}, + {(const char *)"QUERY"}, + {(const char *)"SYNC"}, + {(const char *)"ASYNC"}, + {(const char *)"TEST"}, //Table engines - {(char *)"TinyLog"}, - {(char *)"Log"}, - {(char *)"Memory"}, - {(char *)"MergeTree"}, - {(char *)"ReplacingMergeTree"}, - {(char *)"SummingMergeTree"}, - {(char *)"AggregateFunction"}, - {(char *)"CollapsingMergeTree"}, - {(char *)"GraphiteMergeTree"}, - {(char *)"ReplicatedMergeTree"}, - {(char *)"ReplicatedSummingMergeTree"}, - {(char *)"ReplicatedReplacingMergeTree"}, - {(char *)"ReplicatedAggregatingMergeTree"}, - {(char *)"ReplicatedCollapsingMergeTree"}, - {(char *)"ReplicatedGraphiteMergeTree"}, - {(char *)"Distributed"}, - {(char *)"Dictionary"}, - {(char *)"Merge"}, - {(char *)"Buffer"}, - {(char *)"File"}, - {(char *)"Null"}, - {(char *)"Set"}, - {(char *)"Join"}, - {(char *)"View"}, - {(char *)"MaterializedView"}, - {(char *)"Kafka"}, - {(char *)"MySQL"}, + {(const char *)"TinyLog"}, + {(const char *)"Log"}, + {(const char *)"Memory"}, + {(const char *)"MergeTree"}, + {(const char *)"ReplacingMergeTree"}, + {(const char *)"SummingMergeTree"}, + {(const char *)"AggregateFunction"}, + {(const char *)"CollapsingMergeTree"}, + {(const char *)"GraphiteMergeTree"}, + {(const char *)"ReplicatedMergeTree"}, + {(const char *)"ReplicatedSummingMergeTree"}, + {(const char *)"ReplicatedReplacingMergeTree"}, + {(const char *)"ReplicatedAggregatingMergeTree"}, + {(const char *)"ReplicatedCollapsingMergeTree"}, + {(const char *)"ReplicatedGraphiteMergeTree"}, + {(const char *)"Distributed"}, + {(const char *)"Dictionary"}, + {(const char *)"Merge"}, + {(const char *)"Buffer"}, + {(const char *)"File"}, + {(const char *)"Null"}, + {(const char *)"Set"}, + {(const char *)"Join"}, + {(const char *)"View"}, + {(const char *)"MaterializedView"}, + {(const char *)"Kafka"}, + {(const char *)"MySQL"}, //FORMATS - {(char *)"TabSeparated"}, - {(char *)"TabSeparatedRaw"}, - {(char *)"TabSeparatedWithNames"}, - {(char *)"TSVWithNames"}, - {(char *)"TabSeparatedWithNamesAndTypes"}, - {(char *)"TSVWithNamesAndTypes"}, - {(char *)"CSV"}, - {(char *)"CSVWithNames"}, - {(char *)"Values"}, - {(char *)"Vertical"}, - {(char *)"VerticalRaw"}, - {(char *)"JSON"}, - {(char *)"JSONCompact"}, - {(char *)"JSONEachRow"}, - {(char *)"TSKV"}, - {(char *)"Pretty"}, - {(char *)"PrettyCompact"}, - {(char *)"PrettyCompactMonoBlock"}, - {(char *)"PrettyNoEscapes"}, - {(char *)"PrettySpace"}, - {(char *)"RowBinary"}, - {(char *)"Native"}, - {(char *)"XML"}, - {(char *)"CapnProto"}, + {(const char *)"TabSeparated"}, + {(const char *)"TabSeparatedRaw"}, + {(const char *)"TabSeparatedWithNames"}, + {(const char *)"TSVWithNames"}, + {(const char *)"TabSeparatedWithNamesAndTypes"}, + {(const char *)"TSVWithNamesAndTypes"}, + {(const char *)"CSV"}, + {(const char *)"CSVWithNames"}, + {(const char *)"Values"}, + {(const char *)"Vertical"}, + {(const char *)"VerticalRaw"}, + {(const char *)"JSON"}, + {(const char *)"JSONCompact"}, + {(const char *)"JSONEachRow"}, + {(const char *)"TSKV"}, + {(const char *)"Pretty"}, + {(const char *)"PrettyCompact"}, + {(const char *)"PrettyCompactMonoBlock"}, + {(const char *)"PrettyNoEscapes"}, + {(const char *)"PrettySpace"}, + {(const char *)"RowBinary"}, + {(const char *)"Native"}, + {(const char *)"XML"}, + {(const char *)"CapnProto"}, //TYPES - {(char *)"Int8"}, - {(char *)"Int16"}, - {(char *)"Int32"}, - {(char *)"Int64"}, - {(char *)"UInt8"}, - {(char *)"UInt16"}, - {(char *)"UInt32"}, - {(char *)"UInt64"}, - {(char *)"Float32"}, - {(char *)"Float64"}, - {(char *)"Boolean"}, - {(char *)"String"}, - {(char *)"FixedString"}, - {(char *)"Date"}, - {(char *)"DateTime"}, - {(char *)"Enum8"}, - {(char *)"Enum16"}, - {(char *)"Array"}, - {(char *)"Tuple"}, - {(char *)"Nested"}, - {(char *)"Expression"}, - {(char *)"Set"}, + {(const char *)"Int8"}, + {(const char *)"Int16"}, + {(const char *)"Int32"}, + {(const char *)"Int64"}, + {(const char *)"UInt8"}, + {(const char *)"UInt16"}, + {(const char *)"UInt32"}, + {(const char *)"UInt64"}, + {(const char *)"Float32"}, + {(const char *)"Float64"}, + {(const char *)"Boolean"}, + {(const char *)"String"}, + {(const char *)"FixedString"}, + {(const char *)"Date"}, + {(const char *)"DateTime"}, + {(const char *)"Enum8"}, + {(const char *)"Enum16"}, + {(const char *)"Array"}, + {(const char *)"Tuple"}, + {(const char *)"Nested"}, + {(const char *)"Expression"}, + {(const char *)"Set"}, //END OF LIST - {(char *)nullptr}, + {(const char *)nullptr}, }; #endif //CLICKHOUSE_QUERY_PARTS_H diff --git a/dbms/src/Server/Client.cpp b/dbms/src/Server/Client.cpp index 9af4b3166a0..c0dc4b88cec 100644 --- a/dbms/src/Server/Client.cpp +++ b/dbms/src/Server/Client.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -91,12 +92,79 @@ namespace ErrorCodes extern const int CLIENT_OUTPUT_FORMAT_SPECIFIED; } - class Client : public Poco::Util::Application { public: Client() {} + void init_suggestions(Completion::TSTNode &node) + { + QUERYPART *qP = queryParts; + while (qP->name) { + node.add_word(qP->name); + qP++; + } + + std::vector blocks; + + //preload all functions + sendQuery("SELECT name FROM system.functions", blocks); + for (const auto &block : blocks) { + size_t size = block.rows(); + for (size_t i = size; i--;) { + node.add_word(strdup(block.getByName("name").column->getDataAt(i).toString().c_str())); + } + } + blocks.clear(); + + int limit = config().getInt("suggestion_limit", suggestion_limit); + + //preload limited amount of dbs + auto *query = new char[128]; + sprintf(query, "SELECT name FROM system.databases ORDER BY name LIMIT %d", limit); + sendQuery(query, blocks); + for (const auto &block : blocks) { + size_t size = block.rows(); + for (size_t i = size; i--;) { + node.add_word(strdup(block.getByName("name").column->getDataAt(i).toString().c_str())); + } + } + blocks.clear(); + delete query; + + //preload limited amount of tables and their columns + query = new char[128]; + sprintf( + query, + "SELECT table,name FROM system.columns WHERE table IN (SELECT name FROM system.tables ORDER BY name" + " LIMIT %d)", + limit + ); + sendQuery(query, blocks); + for (const auto &block : blocks) { + ColumnWithTypeAndName tableNameColumn = block.getByName("table"); + ColumnWithTypeAndName columnNameColumn = block.getByName("name"); + size_t size = block.rows(); + for (size_t i = size; i--;) { + const char *tableName = strdup(tableNameColumn.column->getDataAt(i).toString().c_str()); + const char *columnName = strdup(columnNameColumn.column->getDataAt(i).toString().c_str()); + auto *implodedName = new char[strlen(tableName)+strlen(columnName)+2]; + sprintf(implodedName, "%s.%s", tableName, columnName); + if (!node.has(tableName)) { + node.add_word(tableName); + } + if (!node.has(columnName)) { + node.add_word(columnName); + } + if (!node.has(implodedName)) { + node.add_word(implodedName); + } + delete implodedName; + } + } + delete query; + } + private: using StringSet = std::unordered_set; StringSet exit_strings @@ -175,6 +243,9 @@ private: /// External tables info. std::list external_tables; + /// Suggestion limit for how many databases and tables to fetch + int suggestion_limit = 100; + struct ConnectionParameters { @@ -340,17 +411,6 @@ private: || (now.month() == 1 && now.day() <= 5); } - void init_suggestions() - { - QUERYPART *qP = queryParts; - while (qP->name) { - completionNode.add_word(qP->name); - qP++; - } - rl_attempted_completion_function = query_parts_completion; - } - - int mainImpl() { registerFunctions(); @@ -461,7 +521,9 @@ private: if (print_time_to_stderr) throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); #if USE_READLINE - init_suggestions(); + rl_attempted_completion_function = query_parts_completion; + init_suggestions(completionNode); +// std::thread tSuggestionInit(&DB::Client::init_suggestions, this, &completionNode); #endif /// Turn tab completion off. @@ -1421,6 +1483,7 @@ public: ("query_id", boost::program_options::value(), "query_id") ("query,q", boost::program_options::value(), "query") ("database,d", boost::program_options::value(), "database") + ("suggestion_limit", boost::program_options::value()->default_value(suggestion_limit), "Limit number of tables for suggestion") ("pager", boost::program_options::value(), "pager") ("multiline,m", "multiline") ("multiquery,n", "multiquery") @@ -1548,6 +1611,39 @@ public: max_client_network_bandwidth = options["max_client_network_bandwidth"].as(); if (options.count("compression")) config().setBool("compression", options["compression"].as()); + if (options.count("suggestion_limit")) + config().setInt("suggestion_limit", options["suggestion_limit"].as()); + + + } + + void sendQuery(const String &query_, std::vector &blockVector) + { + connection->sendQuery( + query_, + query_id, + QueryProcessingStage::Complete, + &context.getSettingsRef(), + nullptr, + true + ); + + bool continueReceiving = true; + connection->forceConnected(); + do { + try { + Connection::Packet packet = connection->receivePacket(); + continueReceiving = packet.type != Protocol::Server::EndOfStream + && packet.type != Protocol::Server::Exception; + if (!packet.block || !packet.block.rows()) { + continue; + } + + blockVector.emplace_back(packet.block); + } catch (...) { + continueReceiving = false; + } + } while (continueReceiving); } }; From 3e14bdddb08b267065f4669cad4efb28dfc65579 Mon Sep 17 00:00:00 2001 From: morty Date: Wed, 6 Jun 2018 21:19:17 +0300 Subject: [PATCH 030/192] Load suggestions in different thread --- dbms/programs/client/Client.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index e27a9970abe..8e05a837e3f 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -522,8 +521,7 @@ private: throw Exception("time option could be specified only in non-interactive mode", ErrorCodes::BAD_ARGUMENTS); #if USE_READLINE rl_attempted_completion_function = query_parts_completion; - init_suggestions(completionNode); -// std::thread tSuggestionInit(&DB::Client::init_suggestions, this, &completionNode); + std::thread tS([=] {init_suggestions(completionNode);}); #endif /// Turn tab completion off. @@ -1702,3 +1700,4 @@ char * query_parts_generator(const char *text, int state) return (char *) nullptr; } + From ec10e58df2f2a9039ce992b5e1f19b2f7fccf443 Mon Sep 17 00:00:00 2001 From: morty Date: Thu, 7 Jun 2018 11:58:55 +0300 Subject: [PATCH 031/192] Turn off autocompletion if no readline library --- dbms/programs/client/Client.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 8e05a837e3f..a0e458dbd23 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -522,11 +522,10 @@ private: #if USE_READLINE rl_attempted_completion_function = query_parts_completion; std::thread tS([=] {init_suggestions(completionNode);}); -#endif - +#else /// Turn tab completion off. -// rl_bind_key('\t', rl_insert); - + rl_bind_key('\t', rl_insert); +#endif /// Load command history if present. if (config().has("history_file")) history_file = config().getString("history_file"); From 47c6dc1728f02b45ef3eaf4516f7675f3c8f651a Mon Sep 17 00:00:00 2001 From: morty Date: Tue, 19 Jun 2018 17:07:10 +0300 Subject: [PATCH 032/192] Added missing query keywords, preload dictionary names and attributes, fixes and cleanup --- dbms/programs/client/Client.cpp | 39 ++++++++++++++++++++++++++++----- dbms/src/Client/QueryParts.h | 35 ++++++++++++++++++++++++++++- 2 files changed, 68 insertions(+), 6 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index a0e458dbd23..d6a4554beb0 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -111,7 +111,9 @@ public: for (const auto &block : blocks) { size_t size = block.rows(); for (size_t i = size; i--;) { - node.add_word(strdup(block.getByName("name").column->getDataAt(i).toString().c_str())); + char *funcName = strdup(block.getByName("name").column->getDataAt(i).toString().c_str()); + node.add_word(funcName); + free(funcName); } } blocks.clear(); @@ -125,7 +127,9 @@ public: for (const auto &block : blocks) { size_t size = block.rows(); for (size_t i = size; i--;) { - node.add_word(strdup(block.getByName("name").column->getDataAt(i).toString().c_str())); + char *dbName = strdup(block.getByName("name").column->getDataAt(i).toString().c_str()); + node.add_word(dbName); + free(dbName); } } blocks.clear(); @@ -145,8 +149,8 @@ public: ColumnWithTypeAndName columnNameColumn = block.getByName("name"); size_t size = block.rows(); for (size_t i = size; i--;) { - const char *tableName = strdup(tableNameColumn.column->getDataAt(i).toString().c_str()); - const char *columnName = strdup(columnNameColumn.column->getDataAt(i).toString().c_str()); + char *tableName = strdup(tableNameColumn.column->getDataAt(i).toString().c_str()); + char *columnName = strdup(columnNameColumn.column->getDataAt(i).toString().c_str()); auto *implodedName = new char[strlen(tableName)+strlen(columnName)+2]; sprintf(implodedName, "%s.%s", tableName, columnName); if (!node.has(tableName)) { @@ -158,10 +162,34 @@ public: if (!node.has(implodedName)) { node.add_word(implodedName); } + free(tableName); + free(columnName); delete implodedName; } } + blocks.clear(); delete query; + + //preload dictionaries names and attributes + sendQuery("SELECT name, attrName FROM system.dictionaries ARRAY JOIN attribute.names AS attrName", blocks); + for (const auto &block : blocks) { + ColumnWithTypeAndName dictNameColumn = block.getByName("name"); + ColumnWithTypeAndName dictAttrNamesColumn = block.getByName("attrName"); + size_t size = block.rows(); + for (size_t i = size; i--;) { + char *dictName = strdup(dictNameColumn.column->getDataAt(i).toString().c_str()); + char *attrName = strdup(dictAttrNamesColumn.column->getDataAt(i).toString().c_str()); + if (!node.has(dictName)) { + node.add_word(dictName); + } + if (!node.has(attrName)) { + node.add_word(attrName); + } + free(dictName); + free(attrName); + } + } + blocks.clear(); } private: @@ -243,7 +271,7 @@ private: std::list external_tables; /// Suggestion limit for how many databases and tables to fetch - int suggestion_limit = 100; + int suggestion_limit = 256; struct ConnectionParameters @@ -522,6 +550,7 @@ private: #if USE_READLINE rl_attempted_completion_function = query_parts_completion; std::thread tS([=] {init_suggestions(completionNode);}); + tS.detach(); #else /// Turn tab completion off. rl_bind_key('\t', rl_insert); diff --git a/dbms/src/Client/QueryParts.h b/dbms/src/Client/QueryParts.h index bb9a48c7187..91279b6cf44 100644 --- a/dbms/src/Client/QueryParts.h +++ b/dbms/src/Client/QueryParts.h @@ -23,6 +23,7 @@ QUERYPART queryParts[] = { {(const char *)"AS"}, {(const char *)"VIEW"}, {(const char *)"POPULATE"}, + {(const char *)"SETTINGS"}, //ATTACH/DETACH {(const char *)"ATTACH"}, {(const char *)"DETACH"}, @@ -35,8 +36,15 @@ QUERYPART queryParts[] = { {(const char *)"ALTER"}, {(const char *)"ADD"}, {(const char *)"MODIFY"}, + {(const char *)"CLEAR"}, {(const char *)"COLUMN"}, {(const char *)"AFTER"}, + {(const char *)"COPY"}, + {(const char *)"PROJECT"}, + {(const char *)"PRIMARY"}, + {(const char *)"KEY"}, + //CHECK + {(const char *)"CHECK"}, //PARTITIONS {(const char *)"PARTITION"}, {(const char *)"PART"}, @@ -49,8 +57,15 @@ QUERYPART queryParts[] = { {(const char *)"OUTFILE"}, {(const char *)"FORMAT"}, {(const char *)"TABLES"}, + {(const char *)"DATABASES"}, {(const char *)"LIKE"}, {(const char *)"PROCESSLIST"}, + //CONDITIONAL EXPRESSIONS + {(const char *)"CASE"}, + {(const char *)"WHEN"}, + {(const char *)"THEN"}, + {(const char *)"ELSE"}, + {(const char *)"END"}, //DESCRIBE {(const char *)"DESCRIBE"}, {(const char *)"DESC"}, @@ -61,6 +76,7 @@ QUERYPART queryParts[] = { //OPTIMIZE {(const char *)"OPTIMIZE"}, {(const char *)"FINAL"}, + {(const char *)"DEDUPLICATE"}, //INSERT {(const char *)"INSERT"}, {(const char *)"VALUES"}, @@ -71,10 +87,15 @@ QUERYPART queryParts[] = { {(const char *)"ARRAY"}, {(const char *)"JOIN"}, {(const char *)"GLOBAL"}, + {(const char *)"LOCAL"}, {(const char *)"ANY"}, {(const char *)"ALL"}, {(const char *)"INNER"}, {(const char *)"LEFT"}, + {(const char *)"RIGHT"}, + {(const char *)"FULL"}, + {(const char *)"OUTER"}, + {(const char *)"CROSS"}, {(const char *)"USING"}, {(const char *)"PREWHERE"}, {(const char *)"WHERE"}, @@ -84,11 +105,17 @@ QUERYPART queryParts[] = { {(const char *)"TOTALS"}, {(const char *)"HAVING"}, {(const char *)"ORDER"}, + {(const char *)"COLLATE"}, {(const char *)"LIMIT"}, {(const char *)"UNION"}, {(const char *)"AND"}, {(const char *)"OR"}, {(const char *)"ASC"}, + //TABLE FUNCTIONS + {(const char *)"file"}, + {(const char *)"merge"}, + {(const char *)"numbers"}, + {(const char *)"remote"}, //IN {(const char *)"IN"}, //KILL QUERY @@ -97,7 +124,7 @@ QUERYPART queryParts[] = { {(const char *)"SYNC"}, {(const char *)"ASYNC"}, {(const char *)"TEST"}, - //Table engines + //TABLE ENGINES {(const char *)"TinyLog"}, {(const char *)"Log"}, {(const char *)"Memory"}, @@ -150,6 +177,10 @@ QUERYPART queryParts[] = { {(const char *)"Native"}, {(const char *)"XML"}, {(const char *)"CapnProto"}, + {(const char *)"ODBCDriver"}, + {(const char *)"PrettyCompactNoEscapes"}, + {(const char *)"PrettySpaceNoEscapes"}, + {(const char *)"TSVRaw"}, //TYPES {(const char *)"Int8"}, {(const char *)"Int16"}, @@ -173,6 +204,8 @@ QUERYPART queryParts[] = { {(const char *)"Nested"}, {(const char *)"Expression"}, {(const char *)"Set"}, + {(const char *)"Nullable"}, + {(const char *)"tuple"}, //END OF LIST {(const char *)nullptr}, }; From 69f7948bbd9ba2b6623a0e668accbe2edf18dca4 Mon Sep 17 00:00:00 2001 From: morty Date: Thu, 2 Aug 2018 17:57:57 +0300 Subject: [PATCH 033/192] Use system tables for formats, engines, functions, collations, types --- dbms/programs/client/Client.cpp | 19 ++++++- dbms/src/Client/QueryParts.h | 87 --------------------------------- 2 files changed, 17 insertions(+), 89 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 2c8dc6c1c90..df6251419a5 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -204,8 +204,23 @@ public: std::vector blocks; - //preload all functions - sendQuery("SELECT name FROM system.functions", blocks); + //preload all functions, table engines, formats, table functions, data types, combinators, collations + sendQuery( + "SELECT name FROM system.functions" + " UNION ALL " + "SELECT name FROM system.table_engines" + " UNION ALL " + "SELECT name FROM system.formats" + " UNION ALL " + "SELECT name FROM system.table_functions" + " UNION ALL " + "SELECT name FROM system.data_type_families" + " UNION ALL " + "SELECT name FROM system.aggregate_function_combinators" + " UNION ALL " + "SELECT name FROM system.collations", + blocks + ); for (const auto &block : blocks) { size_t size = block.rows(); for (size_t i = size; i--;) { diff --git a/dbms/src/Client/QueryParts.h b/dbms/src/Client/QueryParts.h index 91279b6cf44..1885e500e34 100644 --- a/dbms/src/Client/QueryParts.h +++ b/dbms/src/Client/QueryParts.h @@ -111,11 +111,6 @@ QUERYPART queryParts[] = { {(const char *)"AND"}, {(const char *)"OR"}, {(const char *)"ASC"}, - //TABLE FUNCTIONS - {(const char *)"file"}, - {(const char *)"merge"}, - {(const char *)"numbers"}, - {(const char *)"remote"}, //IN {(const char *)"IN"}, //KILL QUERY @@ -124,88 +119,6 @@ QUERYPART queryParts[] = { {(const char *)"SYNC"}, {(const char *)"ASYNC"}, {(const char *)"TEST"}, - //TABLE ENGINES - {(const char *)"TinyLog"}, - {(const char *)"Log"}, - {(const char *)"Memory"}, - {(const char *)"MergeTree"}, - {(const char *)"ReplacingMergeTree"}, - {(const char *)"SummingMergeTree"}, - {(const char *)"AggregateFunction"}, - {(const char *)"CollapsingMergeTree"}, - {(const char *)"GraphiteMergeTree"}, - {(const char *)"ReplicatedMergeTree"}, - {(const char *)"ReplicatedSummingMergeTree"}, - {(const char *)"ReplicatedReplacingMergeTree"}, - {(const char *)"ReplicatedAggregatingMergeTree"}, - {(const char *)"ReplicatedCollapsingMergeTree"}, - {(const char *)"ReplicatedGraphiteMergeTree"}, - {(const char *)"Distributed"}, - {(const char *)"Dictionary"}, - {(const char *)"Merge"}, - {(const char *)"Buffer"}, - {(const char *)"File"}, - {(const char *)"Null"}, - {(const char *)"Set"}, - {(const char *)"Join"}, - {(const char *)"View"}, - {(const char *)"MaterializedView"}, - {(const char *)"Kafka"}, - {(const char *)"MySQL"}, - //FORMATS - {(const char *)"TabSeparated"}, - {(const char *)"TabSeparatedRaw"}, - {(const char *)"TabSeparatedWithNames"}, - {(const char *)"TSVWithNames"}, - {(const char *)"TabSeparatedWithNamesAndTypes"}, - {(const char *)"TSVWithNamesAndTypes"}, - {(const char *)"CSV"}, - {(const char *)"CSVWithNames"}, - {(const char *)"Values"}, - {(const char *)"Vertical"}, - {(const char *)"VerticalRaw"}, - {(const char *)"JSON"}, - {(const char *)"JSONCompact"}, - {(const char *)"JSONEachRow"}, - {(const char *)"TSKV"}, - {(const char *)"Pretty"}, - {(const char *)"PrettyCompact"}, - {(const char *)"PrettyCompactMonoBlock"}, - {(const char *)"PrettyNoEscapes"}, - {(const char *)"PrettySpace"}, - {(const char *)"RowBinary"}, - {(const char *)"Native"}, - {(const char *)"XML"}, - {(const char *)"CapnProto"}, - {(const char *)"ODBCDriver"}, - {(const char *)"PrettyCompactNoEscapes"}, - {(const char *)"PrettySpaceNoEscapes"}, - {(const char *)"TSVRaw"}, - //TYPES - {(const char *)"Int8"}, - {(const char *)"Int16"}, - {(const char *)"Int32"}, - {(const char *)"Int64"}, - {(const char *)"UInt8"}, - {(const char *)"UInt16"}, - {(const char *)"UInt32"}, - {(const char *)"UInt64"}, - {(const char *)"Float32"}, - {(const char *)"Float64"}, - {(const char *)"Boolean"}, - {(const char *)"String"}, - {(const char *)"FixedString"}, - {(const char *)"Date"}, - {(const char *)"DateTime"}, - {(const char *)"Enum8"}, - {(const char *)"Enum16"}, - {(const char *)"Array"}, - {(const char *)"Tuple"}, - {(const char *)"Nested"}, - {(const char *)"Expression"}, - {(const char *)"Set"}, - {(const char *)"Nullable"}, - {(const char *)"tuple"}, //END OF LIST {(const char *)nullptr}, }; From 11a7825ce71c39462dc95b39fd66315f1ef1e87b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 3 Aug 2018 23:48:43 +0300 Subject: [PATCH 034/192] Update Client.cpp --- dbms/programs/client/Client.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index df6251419a5..96775e5e110 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -204,7 +204,7 @@ public: std::vector blocks; - //preload all functions, table engines, formats, table functions, data types, combinators, collations + //preload all functions, table engines, formats, table functions, data types, combinators sendQuery( "SELECT name FROM system.functions" " UNION ALL " @@ -216,9 +216,7 @@ public: " UNION ALL " "SELECT name FROM system.data_type_families" " UNION ALL " - "SELECT name FROM system.aggregate_function_combinators" - " UNION ALL " - "SELECT name FROM system.collations", + "SELECT name FROM system.aggregate_function_combinators", blocks ); for (const auto &block : blocks) { From ce5810756874264e6d37c7ec530f4dd5b4f44573 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 15 Aug 2018 13:56:54 +0300 Subject: [PATCH 035/192] Update of round(x [, N]) description. --- .../functions/rounding_functions.md | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index a4fc9fa4a05..ed093fb07aa 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -17,10 +17,40 @@ N может быть отрицательным. В остальном, аналогично функции floor, см. выше. ## round(x\[, N\]) -Возвращает ближайшее к num круглое число, которое может быть меньше или больше или равно x. -Если x находится посередине от ближайших круглых чисел, то возвращается какое-либо одно из них (implementation specific). -Число -0. может считаться или не считаться круглым (implementation specific). -В остальном, аналогично функциям floor и ceil, см. выше. + +Реализует [банковское округление](https://en.wikipedia.org/wiki/Rounding#Round_half_to_even), т.е. округление до ближайшего чётного. + +**Входные параметры** + +- `x` — число для округления. [Тип](../../data_types/index.md#data_types) — любой числовой. +- `N` — позиция цифры после запятой, до которой следует округлять. + +**Возвращаемое значение** + +Округлённое число того же типа, что и входное число `x`. + +**Пример** + +``` +SELECT + number / 2 AS x, + round(x) +FROM system.numbers +LIMIT 10 + +┌───x─┬─round(divide(number, 2))─┐ +│ 0 │ 0 │ +│ 0.5 │ 0 │ +│ 1 │ 1 │ +│ 1.5 │ 2 │ +│ 2 │ 2 │ +│ 2.5 │ 2 │ +│ 3 │ 3 │ +│ 3.5 │ 4 │ +│ 4 │ 4 │ +│ 4.5 │ 4 │ +└─────┴──────────────────────────┘ +``` ## roundToExp2(num) Принимает число. Если число меньше единицы - возвращает 0. Иначе округляет число вниз до ближайшей (целой неотрицательной) степени двух. From 613c1669f212bc9c66f9d69a890ebb809cffc30d Mon Sep 17 00:00:00 2001 From: BayoNet Date: Wed, 15 Aug 2018 13:58:37 +0300 Subject: [PATCH 036/192] Update of file() table function description. --- .../ru/query_language/table_functions/file.md | 43 ++++++++++++++++--- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/query_language/table_functions/file.md index f3e35c7f8f0..c080cb3cc3d 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/query_language/table_functions/file.md @@ -2,17 +2,46 @@ # file -`file(path, format, structure)` - возвращает таблицу со столбцами, указанными в structure, созданную из файла path типа format. +Создает таблицу из файла. -path - относительный путь до файла от [user_files_path](../../operations/server_settings/settings.md#user_files_path). +``` +file(path, format, structure) +``` -format - [формат](../../interfaces/formats.md#formats) файла. +**Входные параметры** -structure - структура таблицы в форме 'UserID UInt64, URL String'. Определяет имена и типы столбцов. +- `path` — относительный путь до файла от [user_files_path](../../operations/server_settings/settings.md#user_files_path). +- `format` — [формат](../../interfaces/formats.md#formats) файла. Файл должен быть одного из форматов, которые доступны в запросе `INSERT`. +- `structure` — структура таблицы. Формат `'colunm-1-name column-1-data-type, colunm-2-name column-2-data-type, ...'`. + +**Возвращаемое значение** + +Таблица с указанной структурой. **Пример** -```sql --- получение первых 10 строк таблицы, состоящей из трёх колонок типа UInt32 из CSV файла -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10 +Настройка `user_files_path` и содержимое файла `test.csv`. + +```bash +$ grep user_files_path /etc/clickhouse-server/config.xml + /var/lib/clickhouse/user_files/ + +$ cat /var/lib/clickhouse/user_files/test.csv + 1,2,3 + 3,2,1 + 78,43,45 +``` + +Таблица из `test.csv` и выборка первых двух строк из неё. + +```sql +SELECT * +FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2 +``` +``` +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ ``` From 20f5606fb46bc67672d3700cd0f7c0d5625a0f11 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 16 Aug 2018 11:25:35 +0300 Subject: [PATCH 037/192] Correction and clarification of descriptions of file() and round(). --- docs/ru/query_language/functions/rounding_functions.md | 5 +++-- docs/ru/query_language/table_functions/file.md | 8 ++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index ed093fb07aa..540e8ff4abe 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -31,13 +31,14 @@ N может быть отрицательным. **Пример** -``` +```sql SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 10 - +``` +``` ┌───x─┬─round(divide(number, 2))─┐ │ 0 │ 0 │ │ 0.5 │ 0 │ diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/query_language/table_functions/file.md index c080cb3cc3d..36338395d35 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/query_language/table_functions/file.md @@ -11,16 +11,16 @@ file(path, format, structure) **Входные параметры** - `path` — относительный путь до файла от [user_files_path](../../operations/server_settings/settings.md#user_files_path). -- `format` — [формат](../../interfaces/formats.md#formats) файла. Файл должен быть одного из форматов, которые доступны в запросе `INSERT`. +- `format` — [формат](../../interfaces/formats.md#formats) файла. Поддерживаются те же форматы, что и для запроса `INSERT`. - `structure` — структура таблицы. Формат `'colunm-1-name column-1-data-type, colunm-2-name column-2-data-type, ...'`. **Возвращаемое значение** -Таблица с указанной структурой. +Таблица с указанной структурой и данными из указанного файла. **Пример** -Настройка `user_files_path` и содержимое файла `test.csv`. +Настройка `user_files_path` и содержимое файла `test.csv`: ```bash $ grep user_files_path /etc/clickhouse-server/config.xml @@ -32,7 +32,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv 78,43,45 ``` -Таблица из `test.csv` и выборка первых двух строк из неё. +Таблица из `test.csv` и выборка первых двух строк из неё: ```sql SELECT * From a93d527b3426c7d5bfa83c2e46e27d4e5f415962 Mon Sep 17 00:00:00 2001 From: VadimPE Date: Mon, 20 Aug 2018 11:06:47 +0300 Subject: [PATCH 038/192] CLICKHOUSE-3819 add CASE without ELSE --- dbms/src/Parsers/ParserCase.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dbms/src/Parsers/ParserCase.cpp b/dbms/src/Parsers/ParserCase.cpp index c2193e98e8f..266a82e8214 100644 --- a/dbms/src/Parsers/ParserCase.cpp +++ b/dbms/src/Parsers/ParserCase.cpp @@ -50,8 +50,20 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!has_branch) return false; - if (!s_else.ignore(pos, expected)) - return false; + if (s_else.ignore(pos, expected)) + { + ASTPtr expr_else; + if (!p_expr.parse(pos, expr_else, expected)) + return false; + args.push_back(expr_else); + } + else + { + Field field; + ASTLiteral null_literal(field); + DB::IAST ptr_ = &null_literal; + args.push_back(ptr_); + } ASTPtr expr_else; if (!p_expr.parse(pos, expr_else, expected)) From d18e2497280d6f56d4d6eaaff9c1af4967e0e1f4 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 21 Aug 2018 10:54:37 +0300 Subject: [PATCH 039/192] Updated description of MergeTree. --- docs/ru/data_types/date.md | 2 + .../table_engines/collapsingmergetree.md | 4 +- docs/ru/operations/table_engines/merge.md | 2 + docs/ru/operations/table_engines/mergetree.md | 175 +++++++++++++++--- .../operations/table_engines/replication.md | 2 + .../table_engines/summingmergetree.md | 2 + docs/ru/query_language/select.md | 2 + 7 files changed, 158 insertions(+), 31 deletions(-) diff --git a/docs/ru/data_types/date.md b/docs/ru/data_types/date.md index ff9e40cc7ab..c458deaf627 100644 --- a/docs/ru/data_types/date.md +++ b/docs/ru/data_types/date.md @@ -1,3 +1,5 @@ + + # Date Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2106 года, последний полностью поддерживаемый год - 2105). diff --git a/docs/ru/operations/table_engines/collapsingmergetree.md b/docs/ru/operations/table_engines/collapsingmergetree.md index 6336c0d3000..83dfaf99d72 100644 --- a/docs/ru/operations/table_engines/collapsingmergetree.md +++ b/docs/ru/operations/table_engines/collapsingmergetree.md @@ -1,3 +1,5 @@ + + # CollapsingMergeTree *Движок достаточно специфичен для Яндекс.Метрики.* @@ -33,5 +35,3 @@ CollapsingMergeTree(EventDate, (CounterID, EventDate, intHash32(UniqID), VisitID 1. Написать запрос с GROUP BY и агрегатными функциями, учитывающими знак. Например, чтобы посчитать количество, надо вместо count() написать sum(Sign); чтобы посчитать сумму чего-либо, надо вместо sum(x) написать sum(Sign \* x) и т. п., а также добавить HAVING sum(Sign) `>` 0. Не все величины можно посчитать подобным образом. Например, агрегатные функции min, max не могут быть переписаны. 2. Если необходимо вынимать данные без агрегации (например, проверить наличие строк, самые новые значения которых удовлетворяют некоторым условиям), можно использовать модификатор FINAL для секции FROM. Это вариант существенно менее эффективен. - - diff --git a/docs/ru/operations/table_engines/merge.md b/docs/ru/operations/table_engines/merge.md index aa5d44e71f5..7aa5ebd3348 100644 --- a/docs/ru/operations/table_engines/merge.md +++ b/docs/ru/operations/table_engines/merge.md @@ -1,3 +1,5 @@ + + # Merge Движок `Merge` (не путайте с движком `MergeTree`) не хранит данные самостоятельно, а позволяет читать одновременно из произвольного количества других таблиц. diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 62ea3dc2e2f..d9c239030d6 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -2,55 +2,175 @@ # MergeTree -Движок MergeTree поддерживает индекс по первичному ключу и по дате и обеспечивает возможность обновления данных в реальном времени. -Это наиболее продвинутый движок таблиц в ClickHouse. Не путайте с движком Merge. +Движок `MergeTree`, а также другие движки этого семейства (`*MergeTree`) — это наиболее функциональные движки таблиц ClickHousе. -Движок принимает параметры: имя столбца типа Date, содержащего дату; выражение для семплирования (не обязательно); кортеж, определяющий первичный ключ таблицы; гранулированность индекса. +!!!info + Движок [Merge](merge.md#table_engine-merge) не относится к семейству `*MergeTree`. -Пример без поддержки сэмплирования. +Основные возможности: -```text -MergeTree(EventDate, (CounterID, EventDate), 8192) +- Хранит данные, отсортированные по первичному ключу. + + Это позволяет создавать разреженный индекс небольшого объёма, который позволяет быстрее находить данные. + +- Позволяет оперировать партициями, если задан [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). + + ClickHouse поддерживает отдельные операции с партициями, которые работают эффективнее, чем общие операции с этим же результатом над этими же данными. Также, ClickHouse автоматически отсекает данные по партициям там, где ключ партиционирования указан в запросе. Это также увеличивает эффективность выполнения запросов. + +- Поддерживает репликацию данных. + + Для этого требуется [преобразование](replication.md#convert-mergetree-to-replicated) `MergeTree` к `ReplicatedMergeTree`. Подробнее читайте в разделе [Репликация данных](replication.md#table_engines-replication). + +- Поддерживает сэмплирование данных. + + При необходимости можно задать способ сэмплирования данных в таблице. + + +!!!attention +При создании таблицы помните, что она обязательно должна содержать столбец с датой типа [Date](../../data_types/date.md#data_type-date). + +## Конфигурирование движка при создании таблицы + +``` +ENGINE [=] MergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] [SETTINGS name=value, ...] ``` -Пример с поддержкой сэмплирования. +**Секции ENGINE** -```text +- `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). + + Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../data_types/date.md#data_type-date). В этом случае имена партиций имеют формат `"YYYYMM"`. + +- `ORDER BY` — первичный ключ. + + Тип — [Tuple()](../../data_types/tuple.md#data_type-tuple). Может состоять из произвольных выражений, но обычно это кортеж столбцов. Обязательно должен включать в себя выражение для сэмплирования, если оно задано. + +- `SAMPLE BY` — выражение для сэмплирования. +- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: + + - `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. + +**Пример** + +``` +ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity=8192 +``` + +В примере мы устанавливаем партиционирование по месяцам. + +Также мы задаем выражение для сэмплирования в виде хэша по идентификатору посетителя. Это позволяет псевдослучайным образом перемешать данные в таблице для каждого `CounterID` и `EventDate`. Если при выборке данных задать секцию [SAMPLE](../../query_language/select.md#select-section-sample) то ClickHouse вернёт равномерно-псевдослучайную выборку данных для подмножества посетителей. + +`index_granularity` можно было не указывать, поскольку 8192 — это значение по умолчанию. + +### Устаревший способ конфигурирования движка + +!!!attention + Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. + +``` +ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` + +**Параметры MergeTree()** + +- `date-column` — имя столбца с типом [Date](../../data_types/date.md#data_type-date). На основе этого столбца ClickHouse автоматически создаёт партиции по месяцам. Имена партиций имеют формат `"YYYYMM"`. +- `sampling_expression` — выражение для сэмплирования. +- `(primary, key)` — первичный ключ. Тип — [Tuple()](../../data_types/tuple.md#data_type-tuple). Может состоять из произвольных выражений, но обычно это кортеж столбцов. Обязательно должен включать в себя выражение для сэмплирования, если оно задано. Не обязан включать в себя столбец с датой `date-column`. +- `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. Для большинства задач подходит значение 8192. + +**Пример** + +``` MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)), 8192) ``` -В таблице типа MergeTree обязательно должен быть отдельный столбец, содержащий дату, здесь это столбец EventDate. Тип столбца с датой — обязательно Date (а не DateTime). +Движок `MergeTree` сконфигурирован таким же образом, как и в примере выше для основного способа конфигурирования движка. -Первичным ключом может быть кортеж из произвольных выражений (обычно это просто кортеж столбцов) или одно выражение. +## Хранение данных -Выражение для сэмплирования (использовать не обязательно) — произвольное выражение. Оно должно также присутствовать в первичном ключе. В примере используется хэширование по идентификатору посетителя, чтобы псевдослучайно перемешать данные в таблице для каждого CounterID и EventDate. То есть, при использовании секции SAMPLE в запросе вы получите равномерно-псевдослучайную выборку данных для подмножества посетителей. +Таблица хранится блоками данных, отсортированных по первичному ключу. Каждый блок маркируется максимальной и минимальной датами хранящихся в нём записей. -Таблица реализована как набор кусочков. Каждый кусочек сортирован по первичному ключу. Также для каждого кусочка прописана минимальная и максимальная дата. При вставке в таблицу создаётся новый сортированный кусочек. В фоне периодически инициируется процесс слияния. При слиянии выбирается несколько кусочков, обычно наименьших, и сливаются в один большой сортированный кусочек. +При вставке в таблицу создаются отдельные блоки данных, каждый из которых лексикографически отсортирован по первичному ключу. Например, если первичный ключ — `(CounterID, Date)`, то данные в блоке будут лежать в порядке `CounterID`, а для каждого `CounterID` в порядке `Date`. -То есть, при вставке в таблицу производится инкрементальная сортировка. Слияние реализовано таким образом, что таблица постоянно состоит из небольшого количества сортированных кусочков, а также само слияние делает не слишком много работы. +Данные, относящиеся к разным месяцам разбиваются на разные блоки. В дальнейшем ClickHouse в фоновом режиме объединяет мелкие блоки в более крупные для более эффективного хранения. Блоки, относящиеся к разным месяцам не объединяются, это локализует модификации и упрощает бэкапы. Поддерживается запрос `OPTIMIZE`, который вызывает один внеочередной шаг слияния. -При вставке данные, относящиеся к разным месяцам, разбиваются на разные кусочки. Кусочки, соответствующие разным месяцам, никогда не объединяются. Это сделано, чтобы обеспечить локальность модификаций данных (для упрощения бэкапов). +Для каждого блока данных ClickHouse создаёт индексный файл, который содержит значение первичного ключа для каждой индексной строки («засечка»). Номера индексных строк определяются как `n * index_granularity`, а максимальное значение `n` равно целой части от деления общего количества строк на `index_granularity`. Для каждого столбца также пишутся «засечки» для тех же индексных строк, что и для первичного ключа, эти «засечки» позволяют находить непосредственно данные в столбцах. -Кусочки объединяются до некоторого предельного размера, чтобы не было слишком длительных слияний. +Вы можете использовать одну большую таблицу, постоянно добавляя в неё данные небольшими пачками, именно для этого предназначен движок `MergeTree`. -Для каждого кусочка также пишется индексный файл. Индексный файл содержит значение первичного ключа для каждой index_granularity строки таблицы. То есть, это разреженный индекс сортированных данных. +## Первичные ключи и индексы в запросах -Для столбцов также пишутся «засечки» каждую index_granularity строку, чтобы данные можно было читать в определённом диапазоне. +Рассмотрим первичный ключ — `(CounterID, Date)`, в этом случае, сортировку и индекс можно проиллюстрировать следующим образом: -При чтении из таблицы, запрос SELECT анализируется на предмет того, можно ли использовать индексы. -Индекс может использоваться, если в секции WHERE/PREWHERE, в качестве одного из элементов конъюнкции, или целиком, есть выражение, представляющее операции сравнения на равенства, неравенства, а также IN или LIKE с фиксированным префиксом, над столбцами или выражениями, входящими в первичный ключ или ключ партиционирования, либо над некоторыми частистично монотонными функциями от этих столбцов, а также логические связки над такими выражениями. +``` +Whole data: [-------------------------------------------------------------------------] +CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] +Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] +Marks: | | | | | | | | | | | + a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 +Marks numbers: 0 1 2 3 4 5 6 7 8 9 10 +``` + +Если в запросе к данным указать: + +- `CounterID IN ('a', 'h')`, то сервер читает данные в диапазонах засечек `[0, 3)` и `[6, 8)`. +- `CounterID IN ('a', 'h') AND Date = 3`, то сервер читает данные в диапазонах засечек `[1, 3)` и `[7, 8)`. +- `Date = 3`, то сервер читает данные в диапазоне засечек `[1, 10)`. + +Примеры выше показывают, что использование индекса всегда эффективнее, чем full scan. + +Разреженный индекс допускает чтение лишних строк. При чтении одного диапазона первичного ключа, может быть прочитано до `index_granularity * 2` лишних строк в каждом блоке данных. В большинстве случаев ClickHouse не теряет производительности при `index_granularity = 8192`. + +Разреженность индекса позволяет работать даже с очень большим количеством строк в таблицах, поскольку такой индекс всегда помещается в оперативную память компьютера. + +ClickHouse не требует уникального первичного ключа. Можно вставить много строк с одинаковым первичным ключом. + +### Выбор первичного ключа + +Количество столбцов в первичном ключе не ограничено явным образом. В зависимости от структуры данных в первичный ключ можно включать больше или меньше столбцов. Это может: + +- Увеличить эффективность индекса. + + Пусть первичный ключ — `(a, b)`, тогда добавление ещё одного столбца `c` повысит эффективность, если выполнены условия: + + - Есть запросы с условием на столбец `c`. + - Часто встречаются достаточно длинные (в несколько раз больше `index_granularity`) диапазоны данных с одинаковыми значениями `(a, b)`. Иначе говоря, когда добавление ещё одного столбца позволит пропускать достаточно длинные диапазоны данных. + +- Улучшить сжатие данных. + + ClickHouse сортирует данные по первичному ключу, поэтому чем выше однородность, тем лучше сжатие. + +- Обеспечить дополнительную логику при слиянии в движках [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) и [SummingMergeTree](summingmergetree.md#table_engine-summingmergetree). + + Может потребоваться иметь много полей в первичном ключе, даже если они не нужны для выполнения предыдущих пунктов. + +Длинный первичный ключ будет негативно влиять на производительность вставки и потребление памяти, однако на производительность ClickHouse при запросах `SELECT` лишние столбцы в первичном ключе не влияют. + +### Использование индексов и партиций в запросах + +Для запросов `SELECT` ClickHouse анализирует возможность использования индекса. Индекс может использоваться, если в секции `WHERE/PREWHERE`, в качестве одного из элементов конъюнкции, или целиком, есть выражение, представляющее операции сравнения на равенства, неравенства, а также `IN` или `LIKE` с фиксированным префиксом, над столбцами или выражениями, входящими в первичный ключ или ключ партиционирования, либо над некоторыми частично монотонными функциями от этих столбцов, а также логические связки над такими выражениями. Таким образом, обеспечивается возможность быстро выполнять запросы по одному или многим диапазонам первичного ключа. Например, в указанном примере будут быстро работать запросы для конкретного счётчика; для конкретного счётчика и диапазона дат; для конкретного счётчика и даты, для нескольких счётчиков и диапазона дат и т. п. +Рассмотрим движок сконфигурированный следующим образом: + +``` +ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` + +В этом случае в запросах: + ```sql SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42) SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) ``` -Во всех этих случаях будет использоваться индекс по дате и по первичному ключу. Видно, что индекс используется даже для достаточно сложных выражений. Чтение из таблицы организовано так, что использование индекса не может быть медленнее full scan-а. +ClickHouse будет использовать индекс по первичному ключу для отсечения не подходящих данных, а также ключ партиционирования по месяцам для отсечения партиций, которые находятся в не подходящих диапазонах дат. -В этом примере индекс не может использоваться. +Запросы выше показывают, что индекс используется даже для сложных выражений. Чтение из таблицы организовано так, что использование индекса не может быть медленнее, чем full scan. + +В примере ниже индекс не может использоваться. ```sql SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' @@ -58,14 +178,11 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' Чтобы проверить, сможет ли ClickHouse использовать индекс при выполнении запроса, используйте настройки [force_index_by_date](../settings/settings.md#settings-settings-force_index_by_date) и [force_primary_key](../settings/settings.md#settings-settings-force_primary_key). -Индекс по дате обеспечивает чтение только кусков, содержащих даты из нужного диапазона. При этом кусок данных может содержать данные за многие даты (до целого месяца), а в пределах одного куска данные лежат упорядоченными по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. +Ключ партиционирования по месяцам обеспечивает чтение только тех блоков данных, которые содержат даты из нужного диапазона. При этом блок данных может содержать данные за многие даты (до целого месяца). В пределах одного блока данные упорядочены по первичному ключу, который может не содержать дату в качестве первого столбца. В связи с этим, при использовании запроса с указанием условия только на дату, но не на префикс первичного ключа, будет читаться данных больше, чем за одну дату. -Для конкуррентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. + +## Конкурентный доступ к данным + +Для конкурентного доступа к таблице используется мультиверсионность. То есть, при одновременном чтении и обновлении таблицы, данные будут читаться из набора кусочков, актуального на момент запроса. Длинных блокировок нет. Вставки никак не мешают чтениям. Чтения из таблицы автоматически распараллеливаются. - -Поддерживается запрос `OPTIMIZE`, который вызывает один внеочередной шаг слияния. - -Вы можете использовать одну большую таблицу, постоянно добавляя в неё данные небольшими пачками, именно для этого предназначен движок MergeTree. - -Для всех типов таблиц семейства MergeTree возможна репликация данных — смотрите раздел «Репликация данных». diff --git a/docs/ru/operations/table_engines/replication.md b/docs/ru/operations/table_engines/replication.md index b5459b77b55..1dec0f4f42c 100644 --- a/docs/ru/operations/table_engines/replication.md +++ b/docs/ru/operations/table_engines/replication.md @@ -152,6 +152,8 @@ sudo -u clickhouse touch /var/lib/clickhouse/flags/force_restore_data Отсутствует ограничение на использование сетевой полосы при восстановлении. Имейте это ввиду, если восстанавливаете сразу много реплик. + + ## Преобразование из MergeTree в ReplicatedMergeTree Здесь и далее, под `MergeTree` подразумеваются все движки таблиц семейства `MergeTree`, так же для `ReplicatedMergeTree`. diff --git a/docs/ru/operations/table_engines/summingmergetree.md b/docs/ru/operations/table_engines/summingmergetree.md index 189aac06504..6aa2f116d72 100644 --- a/docs/ru/operations/table_engines/summingmergetree.md +++ b/docs/ru/operations/table_engines/summingmergetree.md @@ -1,3 +1,5 @@ + + # SummingMergeTree Отличается от `MergeTree` тем, что суммирует данные при слиянии. diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 232386c485f..d3e8b8a985b 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -46,6 +46,8 @@ SELECT [DISTINCT] expr_list Модификатор FINAL может быть использован только при SELECT-е из таблицы типа CollapsingMergeTree. При указании FINAL, данные будут выбираться полностью "сколлапсированными". Стоит учитывать, что использование FINAL приводит к выбору кроме указанных в SELECT-е столбцов также столбцов, относящихся к первичному ключу. Также, запрос будет выполняться в один поток, и при выполнении запроса будет выполняться слияние данных. Это приводит к тому, что при использовании FINAL, запрос выполняется медленнее. В большинстве случаев, следует избегать использования FINAL. Подробнее смотрите раздел "Движок CollapsingMergeTree". + + ### Секция SAMPLE Секция SAMPLE позволяет выполнить запрос приближённо. Приближённое выполнение запроса поддерживается только таблицами типа MergeTree\* и только если при создании таблицы было указано выражение, по которому производится выборка (смотрите раздел "Движок MergeTree"). From cdb7116f371e32708c41da49b15427e26d8e7bd1 Mon Sep 17 00:00:00 2001 From: Vladimir Kozbin Date: Tue, 21 Aug 2018 17:04:27 +0700 Subject: [PATCH 040/192] ISSUES-2629 clickhouse-client: fix reading user from config --- dbms/programs/client/Client.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 664f5d79df5..5227edcafe6 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1685,7 +1685,7 @@ public: config().setInt("port", options["port"].as()); if (options.count("secure")) config().setBool("secure", true); - if (options.count("user")) + if (options.count("user") && !options["user"].defaulted()) config().setString("user", options["user"].as()); if (options.count("password")) config().setString("password", options["password"].as()); From 3404d72da1a59ae25bb5cbedb21c5a99fa65e3cb Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 Aug 2018 12:56:01 +0300 Subject: [PATCH 041/192] Fix pointInPolygon case with nonconvex polygon with two nontrivial halfplanes in cell. #2866 --- dbms/src/Functions/GeoUtils.h | 41 ++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/GeoUtils.h b/dbms/src/Functions/GeoUtils.h index d1267b18769..800f8112aaf 100644 --- a/dbms/src/Functions/GeoUtils.h +++ b/dbms/src/Functions/GeoUtils.h @@ -108,7 +108,8 @@ private: inner, outer, singleLine, - pairOfLinesSinglePolygon, + pairOfLinesSingleConvexPolygon, + pairOfLinesSingleNonConvexPolygons, pairOfLinesDifferentPolygons, complexPolygon }; @@ -180,6 +181,9 @@ private: /// Returns a list of half-planes were formed from intersection edges without box edges. inline std::vector findHalfPlanes(const Box & box, const Polygon & intersection); + /// Check that polygon.outer() is convex. + inline bool isConvex(const Polygon & polygon); + using Distance = typename boost::geometry::default_comparable_distance_result::type; /// min(distance(point, edge) : edge in polygon) @@ -306,9 +310,10 @@ bool PointInPolygonWithGrid::contains(CoordinateType x, Coordina return false; case CellType::singleLine: return cell.half_planes[0].contains(x, y); - case CellType::pairOfLinesSinglePolygon: + case CellType::pairOfLinesSingleConvexPolygon: return cell.half_planes[0].contains(x, y) && cell.half_planes[1].contains(x, y); case CellType::pairOfLinesDifferentPolygons: + case CellType::pairOfLinesSingleNonConvexPolygons: return cell.half_planes[0].contains(x, y) || cell.half_planes[1].contains(x, y); case CellType::complexPolygon: return boost::geometry::within(Point(x, y), polygons[cell.index_of_inner_polygon]); @@ -335,6 +340,35 @@ PointInPolygonWithGrid::distance( return distance; } +template +bool PointInPolygonWithGrid::isConvex(const PointInPolygonWithGrid::Polygon & polygon) +{ + const auto & outer = polygon.outer(); + /// Segment or point. + if (outer.size() < 4) + return false; + + auto vecProduct = [](const Point & from, const Point & to) { return from.x() * to.y() - from.y() * to.x(); }; + auto getVector = [](const Point & from, const Point & to) -> Point + { + return Point(to.x() - from.x(), to.y() - from.y()); + }; + + Point first = getVector(outer[0], outer[1]); + Point prev = first; + + for (auto i : ext::range(1, outer.size() - 1)) + { + Point cur = getVector(outer[i], outer[i + 1]); + if (vecProduct(prev, cur) < 0) + return false; + + prev = cur; + } + + return vecProduct(prev, first) >= 0; +} + template std::vector::HalfPlane> PointInPolygonWithGrid::findHalfPlanes( @@ -423,7 +457,8 @@ void PointInPolygonWithGrid::addCell( } else if (half_planes.size() == 2) { - cells[index].type = CellType::pairOfLinesSinglePolygon; + cells[index].type = isConvex(intersection) ? CellType::pairOfLinesSingleConvexPolygon + : CellType::pairOfLinesSingleNonConvexPolygons; cells[index].half_planes[0] = half_planes[0]; cells[index].half_planes[1] = half_planes[1]; } From c27bade22b8774bc03de641cfa973d1ea99203e5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 21 Aug 2018 14:31:22 +0300 Subject: [PATCH 042/192] Added test for pointInPolygon #2866 --- .../0_stateless/00500_point_in_polygon_bug.reference | 1 + .../queries/0_stateless/00500_point_in_polygon_bug.sql | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.reference create mode 100644 dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.sql diff --git a/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.reference b/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.reference new file mode 100644 index 00000000000..8caddbc4207 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.reference @@ -0,0 +1 @@ +1314 1314 diff --git a/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.sql b/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.sql new file mode 100644 index 00000000000..d85b919fa21 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00500_point_in_polygon_bug.sql @@ -0,0 +1,6 @@ +drop table if exists test.coords; +create table test.coords (x Float32, y Float32) engine = Memory; +insert into test.coords values (-81.7897198,29.4292161), (-81.7886889,29.4292222), (-81.787658,29.4292283), (-81.7866341,29.4301369), (-81.7866271,29.4292344), (-81.7856171,29.4319479), (-81.7856101,29.4310454), (-81.7856032,29.430143), (-81.7855962,29.4292405), (-81.7845931,29.4328564), (-81.7845862,29.4319539), (-81.7845792,29.4310515), (-81.7845723,29.430149), (-81.7845654,29.4292466), (-81.7835622,29.4328625), (-81.7835553,29.43196), (-81.7835483,29.4310576), (-81.7835414,29.4301551), (-81.7835345,29.4292527), (-81.7825382,29.433771), (-81.7825313,29.4328685), (-81.7825243,29.4319661), (-81.7825174,29.4310636), (-81.7825105,29.4301612), (-81.7825036,29.4292587), (-81.7815142,29.4346795), (-81.7815073,29.433777), (-81.7815003,29.4328746), (-81.7814934,29.4319721), (-81.7814865,29.4310697), (-81.7814796,29.4301672), (-81.7814727,29.4292648), (-81.7804832,29.4346855), (-81.7804763,29.4337831), (-81.7804694,29.4328806), (-81.7804625,29.4319782), (-81.7804556,29.4310757), (-81.7804487,29.4301733), (-81.7804418,29.4292708), (-81.7794592,29.435594), (-81.7794523,29.4346916), (-81.7794454,29.4337891), (-81.7794385,29.4328867), (-81.7794316,29.4319842), (-81.7794247,29.4310818), (-81.7794178,29.4301793), (-81.7794109,29.4292768), (-81.7784282,29.4356001), (-81.7784213,29.4346976), (-81.7784145,29.4337952), (-81.7784076,29.4328927), (-81.7784007,29.4319902), (-81.7783938,29.4310878), (-81.7783869,29.4301853), (-81.77838,29.4292829), (-81.7773973,29.4356061), (-81.7773904,29.4347036), (-81.7773835,29.4338012), (-81.7773766,29.4328987), (-81.7773698,29.4319963), (-81.7773629,29.4310938), (-81.777356,29.4301914), (-81.7773492,29.4292889), (-81.7763732,29.4365146), (-81.7763663,29.4356121), (-81.7763594,29.4347096), (-81.7763526,29.4338072), (-81.7763457,29.4329047), (-81.7763389,29.4320023), (-81.776332,29.4310998), (-81.7763251,29.4301974), (-81.7763183,29.4292949), (-81.7753422,29.4365206), (-81.7753354,29.4356181), (-81.7753285,29.4347156), (-81.7753217,29.4338132), (-81.7753148,29.4329107), (-81.7753079,29.4320083), (-81.7753011,29.4311058), (-81.7752942,29.4302034), (-81.7752874,29.4293009), (-81.7742496,29.4284045), (-81.7742428,29.427502), (-81.774236,29.426599500000002), (-81.7742291,29.4256971), (-81.7742223,29.4247946), (-81.7742154,29.4238922), (-81.7742086,29.4229897), (-81.7742017,29.4220873), (-81.7741949,29.4211848), (-81.774188,29.4202823), (-81.7741812,29.4193799), (-81.7741744,29.4184774), (-81.7741675,29.417575), (-81.7741607,29.4166725), (-81.7741538,29.4157701), (-81.774147,29.4148676), (-81.7741402,29.4139651), (-81.7741333,29.4130627), (-81.7741265,29.4121602), (-81.7741196,29.4112578), (-81.7741128,29.4103553), (-81.774106,29.4094528), (-81.7740991,29.4085504), (-81.7740923,29.4076479), (-81.7740855,29.4067455), (-81.7740786,29.405843), (-81.7740718,29.4049405), (-81.7740649,29.4040381), (-81.7740581,29.4031356), (-81.7740513,29.4022332), (-81.7740444,29.4013307), (-81.7740376,29.4004282), (-81.7740308,29.3995258), (-81.7740239,29.3986233), (-81.7740171,29.3977209), (-81.7740103,29.3968184), (-81.7740034,29.3959159), (-81.7739966,29.3950135), (-81.7739898,29.394111), (-81.7739829,29.3932086), (-81.7739761,29.3923061), (-81.7739693,29.3914036), (-81.7739624,29.3905012), (-81.7739556,29.3895987), (-81.7739488,29.3886963), (-81.7739419,29.3877938), (-81.7739351,29.3868913), (-81.7739283,29.3859889), (-81.7732188,29.4284104), (-81.7732119,29.427508), (-81.7732051,29.4266055), (-81.7731983,29.4257031), (-81.7731914,29.4248006), (-81.7731846,29.4238982), (-81.7731778,29.4229957), (-81.7731709,29.4220932), (-81.7731641,29.4211908), (-81.7731573,29.4202883), (-81.7731504,29.4193859), (-81.7731436,29.4184834), (-81.7731368,29.417581), (-81.7731299,29.4166785), (-81.7731231,29.415776), (-81.7731163,29.4148736), (-81.7731094,29.4139711), (-81.7731026,29.4130687), (-81.7730958,29.4121662), (-81.7730889,29.4112637), (-81.7730821,29.4103613), (-81.7730753,29.4094588), (-81.7730684,29.4085564), (-81.7730616,29.4076539), (-81.7730548,29.4067514), (-81.773048,29.405849), (-81.7730411,29.4049465), (-81.7730343,29.4040441), (-81.7730275,29.4031416), (-81.7730207,29.4022391), (-81.7730138,29.4013367), (-81.773007,29.4004342), (-81.7730002,29.3995318), (-81.7729933,29.3986293), (-81.7729865,29.3977268), (-81.7729797,29.3968244), (-81.7729729,29.3959219), (-81.772966,29.3950195), (-81.7729592,29.394117), (-81.7729524,29.3932145), (-81.7729456,29.3923121), (-81.7729388,29.3914096), (-81.7729319,29.3905072), (-81.7729251,29.3896047), (-81.7729183,29.3887022), (-81.7729115,29.3877998), (-81.7729046,29.3868973), (-81.7728978,29.3859948), (-81.772891,29.3850924), (-81.7728842,29.3841899), (-81.7728774,29.3832875), (-81.7728705,29.382385), (-81.7728637,29.3814825), (-81.7721879,29.4284164), (-81.7721811,29.427514), (-81.7721742,29.4266115), (-81.7721674,29.4257091), (-81.7721606,29.4248066), (-81.7721537,29.4239041), (-81.7721469,29.4230017), (-81.7721401,29.4220992), (-81.7721333,29.4211968), (-81.7721265,29.4202943), (-81.7721196,29.4193918), (-81.7721128,29.4184894), (-81.772106,29.4175869), (-81.7720992,29.4166845), (-81.7720923,29.415782), (-81.7720855,29.4148796), (-81.7720787,29.4139771), (-81.7720719,29.4130746), (-81.772065,29.4121722), (-81.7720582,29.4112697), (-81.7720514,29.4103673), (-81.7720446,29.4094648), (-81.7720378,29.4085623), (-81.7720309,29.4076599), (-81.7720241,29.4067574), (-81.7720173,29.405855), (-81.7720105,29.4049525), (-81.7720037,29.40405), (-81.7719969,29.4031476), (-81.77199,29.402245100000002), (-81.7719832,29.4013427), (-81.7719764,29.4004402), (-81.7719696,29.3995377), (-81.7719628,29.3986353), (-81.771956,29.3977328), (-81.7719491,29.3968304), (-81.7719423,29.3959279), (-81.7719355,29.3950254), (-81.7719287,29.394123), (-81.7719219,29.393220499999998), (-81.7719151,29.3923181), (-81.7719082,29.3914156), (-81.7719014,29.3905131), (-81.7718946,29.3896107), (-81.7718878,29.3887082), (-81.771881,29.3878057), (-81.7718742,29.3869033), (-81.7718674,29.3860008), (-81.7718606,29.3850984), (-81.7718537,29.3841959), (-81.7718469,29.3832934), (-81.7718401,29.382391), (-81.7718333,29.3814885), (-81.771157,29.4284224), (-81.7711502,29.4275199), (-81.7711434,29.4266175), (-81.7711365,29.425715), (-81.7711297,29.4248126), (-81.7711229,29.4239101), (-81.7711161,29.4230077), (-81.7711093,29.4221052), (-81.7711025,29.4212027), (-81.7710957,29.4203003), (-81.7710888,29.4193978), (-81.771082,29.4184954), (-81.7710752,29.4175929), (-81.7710684,29.4166904), (-81.7710616,29.415788), (-81.7710548,29.4148855), (-81.771048,29.4139831), (-81.7710411,29.4130806), (-81.7710343,29.4121781), (-81.7710275,29.4112757), (-81.7710207,29.4103732), (-81.7710139,29.4094708), (-81.7710071,29.4085683), (-81.7710003,29.4076659), (-81.7709935,29.4067634), (-81.7709867,29.4058609), (-81.7709798,29.4049585), (-81.770973,29.404056), (-81.7709662,29.4031536), (-81.7709594,29.4022511), (-81.7709526,29.4013486), (-81.7709458,29.4004462), (-81.770939,29.3995437), (-81.7709322,29.3986412), (-81.7709254,29.3977388), (-81.7709186,29.3968363), (-81.7709118,29.3959339), (-81.770905,29.3950314), (-81.7708982,29.3941289), (-81.7708913,29.3932265), (-81.7708845,29.392324), (-81.7708777,29.3914216), (-81.7708709,29.3905191), (-81.7708641,29.3896166), (-81.7708573,29.3887142), (-81.7708505,29.3878117), (-81.7708437,29.3869092), (-81.7708369,29.3860068), (-81.7708301,29.3851043), (-81.7708233,29.3842019), (-81.7708165,29.3832994), (-81.7708097,29.3823969), (-81.7708029,29.3814945), (-81.7701329,29.4293308), (-81.7701261,29.4284284), (-81.7701193,29.4275259), (-81.7701125,29.4266234), (-81.7701057,29.425721), (-81.7700989,29.4248185), (-81.7700921,29.4239161), (-81.7700853,29.4230136), (-81.7700785,29.4221112), (-81.7700717,29.4212087), (-81.7700649,29.4203062), (-81.770058,29.4194038), (-81.7700512,29.4185013), (-81.7700444,29.4175989), (-81.7700376,29.4166964), (-81.7700308,29.4157939), (-81.770024,29.4148915), (-81.7700172,29.413989), (-81.7700104,29.4130866), (-81.7700036,29.4121841), (-81.7699968,29.4112816), (-81.76999,29.4103792), (-81.7699832,29.4094767), (-81.7699764,29.4085743), (-81.7699696,29.4076718), (-81.7699628,29.4067694), (-81.769956,29.4058669), (-81.7699492,29.4049644), (-81.7699424,29.404062), (-81.7699356,29.4031595), (-81.7699288,29.402257), (-81.769922,29.4013546), (-81.7699152,29.4004521), (-81.7699084,29.3995497), (-81.7699016,29.3986472), (-81.7698948,29.3977447), (-81.769888,29.3968423), (-81.7698812,29.3959398), (-81.7698744,29.3950374), (-81.7698676,29.3941349), (-81.7698608,29.3932324), (-81.769854,29.39233), (-81.7698472,29.3914275), (-81.7698404,29.390525), (-81.7698336,29.3896226), (-81.7698268,29.3887201), (-81.76982,29.3878177), (-81.7698133,29.3869152), (-81.7698065,29.3860127), (-81.7697997,29.3851103), (-81.7697929,29.3842078), (-81.7697861,29.3833053), (-81.7697793,29.3824029), (-81.7697725,29.3815004), (-81.769102,29.4293368), (-81.7690952,29.4284343), (-81.7690884,29.4275319), (-81.7690816,29.4266294), (-81.7690748,29.4257269), (-81.769068,29.4248245), (-81.7690612,29.423922), (-81.7690544,29.4230196), (-81.7690476,29.4221171), (-81.7690409,29.4212147), (-81.7690341,29.4203122), (-81.7690273,29.4194097), (-81.7690205,29.4185073), (-81.7690137,29.4176048), (-81.7690069,29.4167024), (-81.7690001,29.4157999), (-81.7689933,29.4148974), (-81.7689865,29.413995), (-81.7689797,29.4130925), (-81.7689729,29.4121901), (-81.7689661,29.4112876), (-81.7689593,29.4103851), (-81.7689525,29.4094827), (-81.7689457,29.4085802), (-81.7689389,29.4076778), (-81.7689321,29.4067753), (-81.7689253,29.4058728), (-81.7689186,29.4049704), (-81.7689118,29.4040679), (-81.768905,29.4031655), (-81.7688982,29.402263), (-81.7688914,29.4013605), (-81.7688846,29.4004581), (-81.7688778,29.3995556), (-81.768871,29.3986532), (-81.7688642,29.3977507), (-81.7688574,29.3968482), (-81.7688507,29.3959458), (-81.7688439,29.3950433), (-81.7688371,29.3941408), (-81.7688303,29.3932384), (-81.7688235,29.3923359), (-81.7688167,29.3914335), (-81.7688099,29.390531), (-81.7688031,29.3896285), (-81.7687964,29.3887261), (-81.7687896,29.3878236), (-81.7687828,29.3869211), (-81.768776,29.3860187), (-81.7687692,29.3851162), (-81.7687624,29.3842138), (-81.7687557,29.3833113), (-81.7687489,29.3824088), (-81.7687421,29.3815064), (-81.7680711,29.4293427), (-81.7680644,29.4284403), (-81.7680576,29.4275378), (-81.7680508,29.4266353), (-81.768044,29.4257329), (-81.7680372,29.4248304), (-81.7680304,29.423928), (-81.7680236,29.4230255), (-81.7680168,29.4221231), (-81.76801,29.4212206), (-81.7680033,29.4203181), (-81.7679965,29.4194157), (-81.7679897,29.4185132), (-81.7679829,29.4176108), (-81.7679761,29.4167083), (-81.7679693,29.4158058), (-81.7679625,29.4149034), (-81.7679557,29.4140009), (-81.767949,29.4130985), (-81.7679422,29.412196), (-81.7679354,29.4112935), (-81.7679286,29.4103911), (-81.7679218,29.4094886), (-81.767915,29.4085862), (-81.7679083,29.4076837), (-81.7679015,29.4067812), (-81.7678947,29.4058788), (-81.7678879,29.4049763), (-81.7678811,29.4040739), (-81.7678743,29.4031714), (-81.7678676,29.4022689), (-81.7678608,29.4013665), (-81.767854,29.400464), (-81.7678472,29.3995616), (-81.7678404,29.3986591), (-81.7678337,29.3977566), (-81.7678269,29.3968542), (-81.7678201,29.3959517), (-81.7678133,29.3950492), (-81.7678065,29.3941468), (-81.7677998,29.3932443), (-81.767793,29.3923419), (-81.7677862,29.3914394), (-81.7677794,29.3905369), (-81.7677727,29.3896345), (-81.7677659,29.388732), (-81.7677591,29.3878295), (-81.7677523,29.3869271), (-81.7677456,29.3860246), (-81.7677388,29.3851222), (-81.767732,29.3842197), (-81.7677252,29.3833172), (-81.7677185,29.3824148), (-81.7677117,29.3815123), (-81.7670403,29.4293487), (-81.7670335,29.4284462), (-81.7670267,29.4275437), (-81.7670199,29.4266413), (-81.7670131,29.4257388), (-81.7670063,29.4248364), (-81.7669996,29.4239339), (-81.7669928,29.4230315), (-81.766986,29.422128999999998), (-81.7669792,29.4212265), (-81.7669725,29.4203241), (-81.7669657,29.4194216), (-81.7669589,29.4185192), (-81.7669521,29.4176167), (-81.7669453,29.4167142), (-81.7669386,29.4158118), (-81.7669318,29.4149093), (-81.766925,29.4140069), (-81.7669182,29.4131044), (-81.7669115,29.4122019), (-81.7669047,29.4112995), (-81.7668979,29.410397), (-81.7668911,29.4094946), (-81.7668844,29.4085921), (-81.7668776,29.4076896), (-81.7668708,29.4067872), (-81.766864,29.4058847), (-81.7668573,29.4049823), (-81.7668505,29.4040798), (-81.7668437,29.4031773), (-81.7668369,29.4022749), (-81.7668302,29.4013724), (-81.7668234,29.40047), (-81.7668166,29.3995675), (-81.7668099,29.398665), (-81.7668031,29.3977626), (-81.7667963,29.3968601), (-81.7667895,29.3959576), (-81.7667828,29.3950552), (-81.766776,29.3941527), (-81.7667692,29.3932503), (-81.7667625,29.3923478), (-81.7667557,29.3914453), (-81.7667489,29.3905429), (-81.7667422,29.3896404), (-81.7667354,29.3887379), (-81.7667286,29.3878355), (-81.7667219,29.386933), (-81.7667151,29.3860305), (-81.7667083,29.3851281), (-81.7667016,29.3842256), (-81.7666948,29.3833232), (-81.766688,29.3824207), (-81.7666813,29.3815182), (-81.7660094,29.4293546), (-81.7660026,29.4284521), (-81.7659958,29.4275497), (-81.765989,29.4266472), (-81.7659823,29.4257448), (-81.7659755,29.4248423), (-81.7659687,29.4239398), (-81.765962,29.4230374), (-81.7659552,29.4221349), (-81.7659484,29.4212325), (-81.7659417,29.42033), (-81.7659349,29.4194275), (-81.7659281,29.4185251), (-81.7659213,29.4176226), (-81.7659146,29.4167202), (-81.7659078,29.4158177), (-81.765901,29.4149153), (-81.7658943,29.4140128), (-81.7658875,29.4131103), (-81.7658807,29.4122079), (-81.765874,29.4113054), (-81.7658672,29.4104029), (-81.7658604,29.4095005), (-81.7658537,29.408598), (-81.7658469,29.4076956), (-81.7658401,29.4067931), (-81.7658334,29.4058906), (-81.7658266,29.4049882), (-81.7658199,29.4040857), (-81.7658131,29.4031833), (-81.7658063,29.4022808), (-81.7657996,29.4013783), (-81.7657928,29.4004759), (-81.765786,29.3995734), (-81.7657793,29.398671), (-81.7657725,29.3977685), (-81.7657658,29.396866), (-81.765759,29.3959636), (-81.7657522,29.3950611), (-81.7657455,29.3941586), (-81.7657387,29.3932562), (-81.765732,29.3923537), (-81.7657252,29.3914513), (-81.7657184,29.3905488), (-81.7657117,29.3896463), (-81.7657049,29.3887439), (-81.7656982,29.3878414), (-81.7656914,29.3869389), (-81.7656846,29.3860365), (-81.7656779,29.385134), (-81.7656711,29.3842315), (-81.7656644,29.3833291), (-81.7656576,29.3824266), (-81.7656509,29.3815241), (-81.7649785,29.4293605), (-81.7649717,29.4284581), (-81.7649649,29.4275556), (-81.7649582,29.4266531), (-81.7649514,29.4257507), (-81.7649447,29.4248482), (-81.7649379,29.4239458), (-81.7649311,29.4230433), (-81.7649244,29.4221408), (-81.7649176,29.4212384), (-81.7649109,29.4203359), (-81.7649041,29.4194335), (-81.7648973,29.418531), (-81.7648906,29.4176286), (-81.7648838,29.416726099999998), (-81.7648771,29.4158236), (-81.7648703,29.4149212), (-81.7648635,29.4140187), (-81.7648568,29.4131163), (-81.76485,29.4122138), (-81.7648433,29.4113113), (-81.7648365,29.4104089), (-81.7648298,29.4095064), (-81.764823,29.4086039), (-81.7648162,29.4077015), (-81.7648095,29.406799), (-81.7648027,29.4058966), (-81.764796,29.4049941), (-81.7647892,29.4040916), (-81.7647825,29.4031892), (-81.7647757,29.4022867), (-81.764769,29.4013843), (-81.7647622,29.4004818), (-81.7647554,29.3995793), (-81.7647487,29.3986769), (-81.7647419,29.3977744), (-81.7647352,29.3968719), (-81.7647284,29.3959695), (-81.7647217,29.395067), (-81.7647149,29.3941646), (-81.7647082,29.3932621), (-81.7647014,29.3923596), (-81.7646947,29.3914572), (-81.7646879,29.3905547), (-81.7646812,29.3896522), (-81.7646744,29.3887498), (-81.7646677,29.3878473), (-81.7646609,29.3869448), (-81.7646542,29.3860424), (-81.7646474,29.3851399), (-81.7646407,29.3842375), (-81.7646339,29.383335), (-81.7646272,29.3824325), (-81.7646204,29.3815301), (-81.7639476,29.4293664), (-81.7639408,29.428464), (-81.7639341,29.4275615), (-81.7639273,29.4266591), (-81.7639206,29.4257566), (-81.7639138,29.4248541), (-81.7639071,29.4239517), (-81.7639003,29.4230492), (-81.7638936,29.4221468), (-81.7638868,29.4212443), (-81.7638801,29.4203418), (-81.7638733,29.4194394), (-81.7638666,29.4185369), (-81.7638598,29.4176345), (-81.763853,29.416732), (-81.7638463,29.4158295), (-81.7638396,29.4149271), (-81.7638328,29.4140246), (-81.7638261,29.4131222), (-81.7638193,29.4122197), (-81.7638126,29.4113172), (-81.7638058,29.4104148), (-81.7637991,29.4095123), (-81.7637923,29.4086099), (-81.7637856,29.4077074), (-81.7637788,29.4068049), (-81.7637721,29.4059025), (-81.7637653,29.405), (-81.7637586,29.4040976), (-81.7637518,29.4031951), (-81.7637451,29.4022926), (-81.7637383,29.4013902), (-81.7637316,29.4004877), (-81.7637249,29.3995852), (-81.7637181,29.3986828), (-81.7637114,29.3977803), (-81.7637046,29.3968779), (-81.7636979,29.3959754), (-81.7636911,29.3950729), (-81.7636844,29.3941705), (-81.7636777,29.393268), (-81.7636709,29.3923655), (-81.7636642,29.3914631), (-81.7636574,29.3905606), (-81.7636507,29.3896581), (-81.763644,29.3887557), (-81.7636372,29.3878532), (-81.7636305,29.3869508), (-81.7636237,29.3860483), (-81.763617,29.3851458), (-81.7636103,29.3842434), (-81.7636035,29.3833409), (-81.7635968,29.3824384), (-81.76359,29.381536), (-81.7629167,29.4293723), (-81.7629099,29.4284699), (-81.7629032,29.4275674), (-81.7628965,29.426665), (-81.7628897,29.4257625), (-81.762883,29.42486), (-81.7628762,29.4239576), (-81.7628695,29.4230551), (-81.7628627,29.4221527), (-81.762856,29.4212502), (-81.7628493,29.4203478), (-81.7628425,29.4194453), (-81.7628358,29.4185428), (-81.762829,29.4176404), (-81.7628223,29.4167379), (-81.7628155,29.4158355), (-81.7628088,29.414933), (-81.7628021,29.4140305), (-81.7627953,29.4131281), (-81.7627886,29.4122256), (-81.7627818,29.4113231), (-81.7627751,29.4104207), (-81.7627684,29.4095182), (-81.7627616,29.4086158), (-81.7627549,29.4077133), (-81.7627482,29.4068108), (-81.7627414,29.4059084), (-81.7627347,29.4050059), (-81.7627279,29.4041035), (-81.7627212,29.403201), (-81.7627145,29.4022985), (-81.76270769999999,29.4013961), (-81.762701,29.4004936), (-81.7626943,29.3995911), (-81.7626875,29.3986887), (-81.7626808,29.3977862), (-81.7626741,29.3968838), (-81.7626673,29.3959813), (-81.7626606,29.3950788), (-81.7626539,29.3941764), (-81.7626471,29.3932739), (-81.7626404,29.3923714), (-81.7626337,29.391469), (-81.7626269,29.3905665), (-81.7626202,29.389664), (-81.7626135,29.3887616), (-81.7626067,29.3878591), (-81.7626,29.3869567), (-81.7625933,29.3860542), (-81.7625865,29.3851517), (-81.7625798,29.3842493), (-81.7625731,29.383346799999998), (-81.7625664,29.3824443), (-81.7625596,29.3815419), (-81.7618858,29.4293782), (-81.7618791,29.4284758), (-81.7618723,29.4275733), (-81.7618656,29.4266709), (-81.7618589,29.4257684), (-81.7618521,29.4248659), (-81.7618454,29.4239635), (-81.7618386,29.423061), (-81.7618319,29.4221586), (-81.7618252,29.4212561), (-81.7618184,29.4203537), (-81.7618117,29.4194512), (-81.761805,29.4185487), (-81.7617983,29.4176463), (-81.7617915,29.4167438), (-81.7617848,29.4158413), (-81.7617781,29.4149389), (-81.7617713,29.4140364), (-81.7617646,29.413134), (-81.7617579,29.4122315), (-81.7617511,29.411329), (-81.7617444,29.4104266), (-81.7617377,29.4095241), (-81.7617309,29.4086217), (-81.7617242,29.4077192), (-81.7617175,29.4068167), (-81.7617108,29.4059143), (-81.761704,29.4050118), (-81.7616973,29.404109300000002), (-81.7616906,29.4032069), (-81.7616839,29.4023044), (-81.7616771,29.401402), (-81.7616704,29.4004995), (-81.7616637,29.399597), (-81.7616569,29.3986946), (-81.7616502,29.3977921), (-81.7616435,29.3968896), (-81.7616368,29.3959872), (-81.76163,29.3950847), (-81.7616233,29.3941823), (-81.7616166,29.3932798), (-81.7616099,29.3923773), (-81.7616032,29.3914749), (-81.7615964,29.3905724), (-81.7615897,29.3896699), (-81.761583,29.3887675), (-81.7615763,29.387865), (-81.7615695,29.3869625), (-81.7615628,29.3860601), (-81.7615561,29.3851576), (-81.7615494,29.3842551), (-81.7615427,29.3833527), (-81.7615359,29.3824502), (-81.7615292,29.3815478), (-81.7608549,29.4293841), (-81.7608482,29.4284817), (-81.7608414,29.4275792), (-81.7608347,29.4266768), (-81.760828,29.4257743), (-81.7608213,29.4248718), (-81.7608145,29.4239694), (-81.7608078,29.4230669), (-81.7608011,29.4221645), (-81.7607944,29.421262), (-81.7607876,29.4203595), (-81.7607809,29.4194571), (-81.7607742,29.4185546), (-81.7607675,29.4176522), (-81.7607608,29.4167497), (-81.760754,29.4158472), (-81.7607473,29.4149448), (-81.7607406,29.4140423), (-81.7607339,29.4131399), (-81.7607271,29.4122374), (-81.7607204,29.4113349), (-81.7607137,29.4104325), (-81.760707,29.40953), (-81.7607003,29.4086275), (-81.7606935,29.4077251), (-81.7606868,29.4068226), (-81.7606801,29.4059202), (-81.7606734,29.4050177), (-81.7606667,29.4041152), (-81.76066,29.4032128), (-81.7606532,29.4023103), (-81.76064650000001,29.4014078), (-81.7606398,29.4005054), (-81.7606331,29.3996029), (-81.7606264,29.3987005), (-81.7606196,29.397798), (-81.7606129,29.3968955), (-81.7606062,29.3959931), (-81.7605995,29.3950906), (-81.7605928,29.3941881), (-81.7605861,29.3932857), (-81.7605794,29.3923832), (-81.7605726,29.3914807), (-81.7605659,29.3905783), (-81.7605592,29.3896758), (-81.7605525,29.3887734), (-81.7605458,29.3878709), (-81.7605391,29.3869684), (-81.7605324,29.386066), (-81.7605257,29.3851635), (-81.7605189,29.384261), (-81.7605122,29.3833586), (-81.7605055,29.3824561), (-81.7604988,29.3815536), (-81.759824,29.42939), (-81.7598173,29.4284876), (-81.7598106,29.4275851), (-81.7598039,29.4266826), (-81.7597971,29.4257802), (-81.7597904,29.4248777), (-81.7597837,29.4239753), (-81.759777,29.4230728), (-81.7597703,29.4221703), (-81.7597636,29.4212679), (-81.7597568,29.4203654), (-81.7597501,29.419463), (-81.7597434,29.4185605), (-81.7597367,29.417658), (-81.75973,29.4167556), (-81.7597233,29.4158531), (-81.7597166,29.4149507), (-81.7597098,29.4140482), (-81.7597031,29.4131457), (-81.7596964,29.4122433), (-81.7596897,29.4113408), (-81.759683,29.4104384), (-81.7596763,29.4095359), (-81.7596696,29.4086334), (-81.7596629,29.407731), (-81.7596562,29.4068285), (-81.7596494,29.405926), (-81.7596427,29.4050236), (-81.759636,29.4041211), (-81.7596293,29.4032187), (-81.7596226,29.4023162), (-81.7596159,29.4014137), (-81.7596092,29.4005113), (-81.7596025,29.3996088), (-81.7595958,29.3987063), (-81.7595891,29.3978039), (-81.7595824,29.3969014), (-81.7595757,29.3959989), (-81.759569,29.3950965), (-81.7595622,29.394194), (-81.7595555,29.3932916), (-81.7595488,29.3923891), (-81.7595421,29.3914866), (-81.7595354,29.3905842), (-81.7595287,29.3896817), (-81.759522,29.3887792), (-81.7595153,29.3878768), (-81.7595086,29.3869743), (-81.7595019,29.3860718), (-81.7594952,29.3851694), (-81.7594885,29.3842669), (-81.7594818,29.3833644), (-81.7594751,29.382462), (-81.7594684,29.3815595), (-81.7587931,29.4293959), (-81.7587864,29.4284934), (-81.7587797,29.427591), (-81.758773,29.4266885), (-81.7587663,29.4257861), (-81.7587596,29.4248836), (-81.7587529,29.4239811), (-81.7587462,29.4230787), (-81.7587395,29.4221762), (-81.7587327,29.4212738), (-81.758726,29.4203713), (-81.7587193,29.4194688), (-81.7587126,29.4185664), (-81.7587059,29.4176639), (-81.7586992,29.4167615), (-81.7586925,29.415859), (-81.7586858,29.4149565), (-81.7586791,29.4140541), (-81.7586724,29.4131516), (-81.7586657,29.4122491), (-81.758659,29.4113467), (-81.7586523,29.4104442), (-81.7586456,29.4095418), (-81.7586389,29.4086393), (-81.7586322,29.4077368), (-81.7586255,29.4068344), (-81.7586188,29.4059319), (-81.7586121,29.4050294), (-81.7586054,29.404127), (-81.7585987,29.4032245), (-81.758592,29.4023221), (-81.7585853,29.4014196), (-81.7585786,29.4005171), (-81.7585719,29.3996147), (-81.7585652,29.3987122), (-81.7585585,29.3978097), (-81.7585518,29.3969073), (-81.7585451,29.3960048), (-81.7585384,29.3951024), (-81.7585317,29.3941999), (-81.758525,29.3932974), (-81.7585183,29.392395), (-81.7585116,29.3914925), (-81.7585049,29.39059), (-81.7584982,29.3896876), (-81.7584915,29.3887851), (-81.7584848,29.3878826), (-81.7584781,29.3869802), (-81.7584715,29.3860777), (-81.7584648,29.3851752), (-81.7584581,29.3842728), (-81.7584514,29.3833703), (-81.7584447,29.3824678), (-81.758438,29.3815654), (-81.7577622,29.4294018), (-81.7577555,29.4284993), (-81.7577488,29.4275968), (-81.7577421,29.4266944), (-81.7577354,29.4257919), (-81.7577287,29.4248895), (-81.757722,29.423987), (-81.7577153,29.4230845), (-81.7577086,29.4221821), (-81.7577019,29.4212796), (-81.7576952,29.4203772), (-81.7576885,29.4194747), (-81.7576818,29.4185722), (-81.7576752,29.4176698), (-81.7576685,29.4167673), (-81.7576618,29.4158649), (-81.7576551,29.4149624), (-81.7576484,29.4140599), (-81.7576417,29.4131575), (-81.757635,29.412255), (-81.7576283,29.4113525), (-81.7576216,29.4104501), (-81.7576149,29.4095476), (-81.75760819999999,29.4086452), (-81.7576015,29.4077427), (-81.7575948,29.4068402), (-81.7575881,29.4059378), (-81.7575814,29.4050353), (-81.7575748,29.4041328), (-81.7575681,29.403230399999998), (-81.7575614,29.4023279), (-81.7575547,29.4014255), (-81.757548,29.400523), (-81.7575413,29.3996205), (-81.7575346,29.3987181), (-81.7575279,29.3978156), (-81.7575212,29.3969131), (-81.7575145,29.3960107), (-81.7575079,29.3951082), (-81.7575012,29.3942057), (-81.7574945,29.3933033), (-81.7574878,29.3924008), (-81.7574811,29.3914984), (-81.7574744,29.3905959), (-81.7574677,29.3896934), (-81.7574611,29.388791), (-81.7574544,29.3878885), (-81.7574477,29.386986), (-81.757441,29.3860836), (-81.7574343,29.3851811), (-81.7574276,29.384278600000002), (-81.7574209,29.3833762), (-81.7574143,29.3824737), (-81.7574076,29.3815712), (-81.7567313,29.4294076), (-81.7567246,29.4285052), (-81.7567179,29.4276027), (-81.7567113,29.4267002), (-81.7567046,29.4257978), (-81.7566979,29.4248953), (-81.7566912,29.423992900000002), (-81.7566845,29.4230904), (-81.7566778,29.4221879), (-81.7566711,29.4212855), (-81.7566644,29.420383), (-81.7566577,29.4194806), (-81.7566511,29.4185781), (-81.7566444,29.4176756), (-81.7566377,29.4167732), (-81.756631,29.4158707), (-81.7566243,29.4149683), (-81.7566176,29.4140658), (-81.7566109,29.4131633), (-81.7566043,29.4122609), (-81.7565976,29.4113584), (-81.7565909,29.4104559), (-81.7565842,29.4095535), (-81.7565775,29.408651), (-81.7565708,29.4077486), (-81.7565642,29.4068461), (-81.7565575,29.4059436), (-81.7565508,29.4050412), (-81.7565441,29.4041387), (-81.7565374,29.4032362), (-81.7565308,29.4023338), (-81.75652410000001,29.4014313), (-81.7565174,29.4005288), (-81.7565107,29.3996264), (-81.756504,29.3987239), (-81.7564973,29.3978215), (-81.7564907,29.396919), (-81.756484,29.3960165), (-81.7564773,29.3951141), (-81.7564706,29.3942116), (-81.756464,29.3933091), (-81.7564573,29.3924067), (-81.7564506,29.3915042), (-81.7564439,29.3906017), (-81.7564372,29.3896993), (-81.7564306,29.3887968), (-81.7564239,29.3878943), (-81.7564172,29.3869919), (-81.7564105,29.3860894), (-81.7564039,29.3851869), (-81.7563972,29.3842845), (-81.7563905,29.383382), (-81.7563838,29.3824795), (-81.7563772,29.3815771), (-81.7557004,29.4294135), (-81.7556937,29.428511), (-81.7556871,29.4276086), (-81.7556804,29.4267061), (-81.7556737,29.4258036), (-81.755667,29.4249012), (-81.7556603,29.4239987), (-81.7556537,29.4230963), (-81.755647,29.4221938), (-81.7556403,29.4212913), (-81.7556336,29.4203889), (-81.755627,29.4194864), (-81.7556203,29.418584), (-81.7556136,29.4176815), (-81.7556069,29.416779), (-81.7556002,29.4158766), (-81.7555936,29.4149741), (-81.7555869,29.4140716), (-81.7555802,29.4131692), (-81.7555735,29.4122667), (-81.7555669,29.4113643), (-81.7555602,29.4104618), (-81.7555535,29.4095593), (-81.7555468,29.4086569), (-81.7555402,29.4077544), (-81.7555335,29.4068519), (-81.7555268,29.4059495), (-81.7555201,29.405047), (-81.7555135,29.4041446), (-81.7555068,29.4032421), (-81.7555001,29.4023396), (-81.7554935,29.4014372), (-81.7554868,29.4005347), (-81.7554801,29.3996322), (-81.7554734,29.3987298), (-81.7554668,29.3978273), (-81.7554601,29.3969248), (-81.7554534,29.3960224), (-81.7554468,29.3951199), (-81.7554401,29.3942174), (-81.7554334,29.393315), (-81.7554268,29.3924125), (-81.7554201,29.39151), (-81.7554134,29.3906076), (-81.7554067,29.3897051), (-81.7554001,29.3888027), (-81.7553934,29.3879002), (-81.7553867,29.3869977), (-81.7553801,29.3860953), (-81.7553734,29.3851928), (-81.7553667,29.3842903), (-81.7553601,29.3833879), (-81.7553534,29.3824854), (-81.7553468,29.3815829), (-81.7546695,29.4294193), (-81.7546629,29.428516899999998), (-81.7546562,29.4276144), (-81.7546495,29.4267119), (-81.7546428,29.4258095), (-81.7546362,29.424907), (-81.7546295,29.4240046), (-81.7546228,29.4231021), (-81.7546162,29.4221996), (-81.7546095,29.4212972), (-81.7546028,29.4203947), (-81.7545962,29.4194923), (-81.7545895,29.4185898), (-81.7545828,29.4176873), (-81.7545762,29.4167849), (-81.7545695,29.4158824), (-81.7545628,29.4149799), (-81.7545562,29.4140775), (-81.7545495,29.413175), (-81.7545428,29.4122726), (-81.7545362,29.4113701), (-81.7545295,29.4104676), (-81.7545228,29.4095652), (-81.7545162,29.4086627), (-81.7545095,29.4077602), (-81.7545028,29.4068578), (-81.7544962,29.4059553), (-81.7544895,29.4050529), (-81.7544828,29.4041504), (-81.7544762,29.4032479), (-81.7544695,29.4023455), (-81.7544628,29.401443), (-81.7544562,29.4005405), (-81.7544495,29.3996381), (-81.7544429,29.3987356), (-81.7544362,29.3978331), (-81.7544295,29.3969307), (-81.7544229,29.3960282), (-81.7544162,29.3951257), (-81.7544096,29.3942233), (-81.7544029,29.3933208), (-81.7543962,29.3924183), (-81.7543896,29.3915159), (-81.7543829,29.3906134), (-81.7543763,29.389711), (-81.7543696,29.3888085), (-81.7543629,29.387906), (-81.7543563,29.3870036), (-81.7543496,29.3861011), (-81.754343,29.3851986), (-81.7543363,29.3842962), (-81.7543297,29.3833937), (-81.754323,29.3824912), (-81.7543163,29.3815888), (-81.7536386,29.4294252), (-81.753632,29.4285227), (-81.7536253,29.4276202), (-81.7536187,29.4267178), (-81.753612,29.4258153), (-81.7536053,29.4249129), (-81.7535987,29.4240104), (-81.753592,29.4231079), (-81.7535853,29.4222055), (-81.7535787,29.421303), (-81.753572,29.4204006), (-81.7535654,29.4194981), (-81.7535587,29.4185956), (-81.753552,29.4176932), (-81.7535454,29.4167907), (-81.75353870000001,29.4158882), (-81.7535321,29.4149858), (-81.7535254,29.4140833), (-81.7535188,29.4131809), (-81.7535121,29.4122784), (-81.7535054,29.4113759), (-81.7534988,29.4104735), (-81.7534921,29.409571), (-81.7534855,29.4086685), (-81.7534788,29.4077661), (-81.7534722,29.4068636), (-81.7534655,29.4059611), (-81.7534588,29.4050587), (-81.7534522,29.4041562), (-81.7534455,29.4032538), (-81.7534389,29.4023513), (-81.7534322,29.4014488), (-81.7534256,29.4005464), (-81.7534189,29.3996439), (-81.7534123,29.3987414), (-81.7534056,29.397839), (-81.753399,29.3969365), (-81.7533923,29.396034), (-81.7533857,29.3951316), (-81.753379,29.3942291), (-81.7533724,29.3933266), (-81.7533657,29.3924242), (-81.7533591,29.3915217), (-81.7533524,29.3906192), (-81.7533458,29.3897168), (-81.7533391,29.3888143), (-81.7533325,29.3879118), (-81.7533258,29.3870094), (-81.75331919999999,29.3861069), (-81.7533125,29.3852044), (-81.7533059,29.384302), (-81.7532992,29.3833995), (-81.7532926,29.382497), (-81.7532859,29.3815946), (-81.7526077,29.429431), (-81.7526011,29.4285285), (-81.7525944,29.4276261), (-81.7525878,29.4267236), (-81.7525811,29.4258211), (-81.7525745,29.4249187), (-81.7525678,29.4240162), (-81.7525612,29.4231138), (-81.7525545,29.4222113), (-81.7525479,29.4213088), (-81.7525412,29.4204064), (-81.7525346,29.4195039), (-81.7525279,29.4186015), (-81.7525213,29.417699), (-81.7525146,29.4167965), (-81.752508,29.4158941), (-81.7525013,29.4149916), (-81.7524947,29.414089099999998), (-81.752488,29.4131867), (-81.7524814,29.4122842), (-81.7524747,29.4113818), (-81.7524681,29.4104793), (-81.7524614,29.4095768), (-81.7524548,29.4086744), (-81.7524481,29.4077719), (-81.7524415,29.4068694), (-81.7524348,29.405967), (-81.7524282,29.4050645), (-81.7524216,29.404162), (-81.7524149,29.4032596), (-81.7524083,29.4023571), (-81.7524016,29.4014546), (-81.752395,29.4005522), (-81.7523883,29.3996497), (-81.7523817,29.3987473), (-81.752375,29.3978448), (-81.7523684,29.3969423), (-81.7523618,29.3960399), (-81.7523551,29.3951374), (-81.7523485,29.3942349), (-81.7523418,29.3933325), (-81.7523352,29.39243), (-81.7523285,29.3915275), (-81.7523219,29.3906251), (-81.7523153,29.3897226), (-81.7523086,29.3888201), (-81.752302,29.3879177), (-81.7522953,29.3870152), (-81.7522887,29.386112699999998), (-81.7522821,29.3852103), (-81.7522754,29.3843078), (-81.7522688,29.3834053), (-81.7522621,29.3825029), (-81.7522555,29.3816004), (-81.7515768,29.4294368), (-81.7515702,29.4285344), (-81.7515636,29.4276319), (-81.7515569,29.4267294), (-81.7515503,29.425827), (-81.7515436,29.4249245), (-81.751537,29.424022), (-81.7515303,29.4231196), (-81.7515237,29.4222171), (-81.7515171,29.4213147), (-81.7515104,29.4204122), (-81.7515038,29.4195097), (-81.7514971,29.4186073), (-81.7514905,29.4177048), (-81.7514839,29.4168023), (-81.7514772,29.4158999), (-81.7514706,29.4149974), (-81.7514639,29.414095), (-81.7514573,29.4131925), (-81.7514507,29.41229), (-81.751444,29.4113876), (-81.7514374,29.4104851), (-81.7514307,29.4095826), (-81.7514241,29.4086802), (-81.7514175,29.4077777), (-81.7514108,29.4068753), (-81.7514042,29.4059728), (-81.7513975,29.4050703), (-81.7513909,29.4041679), (-81.7513843,29.4032654), (-81.7513776,29.4023629), (-81.751371,29.4014605), (-81.7513644,29.400558), (-81.7513577,29.3996555), (-81.7513511,29.3987531), (-81.7513445,29.3978506), (-81.7513378,29.3969481), (-81.7513312,29.3960457), (-81.7513246,29.3951432), (-81.7513179,29.3942407), (-81.7513113,29.3933383), (-81.7513047,29.3924358), (-81.751298,29.3915333), (-81.7512914,29.3906309), (-81.7512848,29.3897284), (-81.7512781,29.3888259), (-81.7512715,29.3879235), (-81.7512649,29.387021), (-81.7512582,29.3861185), (-81.7512516,29.3852161), (-81.751245,29.3843136), (-81.7512384,29.3834111), (-81.7512317,29.3825087), (-81.7512251,29.3816062); + +select count(), sum(pointInPolygon((x, y),[(-82.311206, 28.960046), (-81.658722, 28.960345), (-81.65921, 29.047816), (-81.638697, 29.047761), (-81.641902, 29.276992), (-81.680709, 29.323243), (-81.675429, 29.338641), (-81.656377, 29.337981), (-81.668564, 29.371704), (-81.741634, 29.371123), (-81.741671, 29.429891), (-81.774905, 29.429249), (-81.776206, 29.487448), (-81.842765, 29.486042), (-81.844929, 29.521679), (-81.912052, 29.503906), (-81.93199, 29.51639), (-82.03174, 29.492385), (-82.055896, 29.471639), (-82.056748, 29.439952), (-82.080922, 29.440351), (-82.099886, 29.418665), (-82.101119, 29.438298), (-82.134555, 29.434982), (-82.148231, 29.417245), (-82.211144, 29.429693), (-82.212727, 29.465624), (-82.188, 29.456239), (-82.211448, 29.484017), (-82.40662, 29.485049), (-82.403059, 29.215536), (-82.534829, 29.2147), (-82.535191, 29.041186), (-82.471915, 29.052551), (-82.418649, 29.013144), (-82.401814, 29.019004), (-82.365594, 29.00246), (-82.311206, 28.960046)])) from test.coords; +drop table if exists test.coords; From 55d82dfc84c92f9ef9c828357b84838c22cb9267 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Tue, 21 Aug 2018 17:03:06 +0300 Subject: [PATCH 043/192] fix race in initialization of ReplicatedMergeTreeRestartingThread [#CLICKHOUSE-3899] --- .../ReplicatedMergeTreeRestartingThread.cpp | 44 +++++-------------- .../ReplicatedMergeTreeRestartingThread.h | 9 +++- .../Storages/StorageReplicatedMergeTree.cpp | 22 ++++++---- .../src/Storages/StorageReplicatedMergeTree.h | 3 +- 4 files changed, 35 insertions(+), 43 deletions(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 0b63a52e83d..bb97aabe691 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -52,34 +52,6 @@ ReplicatedMergeTreeRestartingThread::ReplicatedMergeTreeRestartingThread(Storage check_period_ms = storage.data.settings.check_delay_period * 1000; task = storage.context.getSchedulePool().createTask(log_name, [this]{ run(); }); - task->schedule(); -} - -ReplicatedMergeTreeRestartingThread::~ReplicatedMergeTreeRestartingThread() -{ - try - { - /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. - need_stop = true; - task->deactivate(); - LOG_TRACE(log, "Restarting thread finished"); - - /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. - storage.fetcher.blocker.cancelForever(); - storage.merger_mutator.actions_blocker.cancelForever(); - - /// Stop other tasks. - - partialShutdown(); - - if (storage.queue_task_handle) - storage.context.getBackgroundPool().removeTask(storage.queue_task_handle); - storage.queue_task_handle.reset(); - } - catch (...) - { - tryLogCurrentException(log, __PRETTY_FUNCTION__); - } } void ReplicatedMergeTreeRestartingThread::run() @@ -215,10 +187,6 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.alter_thread.start(); storage.part_check_thread.start(); - if (!storage.queue_task_handle) - storage.queue_task_handle = storage.context.getBackgroundPool().addTask( - std::bind(&StorageReplicatedMergeTree::queueTask, &storage)); - return true; } catch (...) @@ -368,4 +336,16 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() LOG_TRACE(log, "Threads finished"); } + +void ReplicatedMergeTreeRestartingThread::shutdown() +{ + /// Stop restarting_thread before stopping other tasks - so that it won't restart them again. + need_stop = true; + task->deactivate(); + LOG_TRACE(log, "Restarting thread finished"); + + /// Stop other tasks. + partialShutdown(); +} + } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 1192ec45703..28314a7d2c1 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -23,10 +23,17 @@ class ReplicatedMergeTreeRestartingThread { public: ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - ~ReplicatedMergeTreeRestartingThread(); + + void start() + { + task->activate(); + task->schedule(); + } void wakeup() { task->schedule(); } + void shutdown(); + private: StorageReplicatedMergeTree & storage; String log_name; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 586405acf75..1ac71665580 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -215,7 +215,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( [this] (const std::string & name) { enqueuePartForCheck(name); }), reader(data), writer(data), merger_mutator(data, context.getBackgroundPool()), queue(*this), fetcher(data), - cleanup_thread(*this), alter_thread(*this), part_check_thread(*this), + cleanup_thread(*this), alter_thread(*this), part_check_thread(*this), restarting_thread(*this), log(&Logger::get(database_name + "." + table_name + " (StorageReplicatedMergeTree)")) { if (path_.empty()) @@ -2063,9 +2063,7 @@ void StorageReplicatedMergeTree::queueUpdatingTask() if (e.code == ZooKeeperImpl::ZooKeeper::ZSESSIONEXPIRED) { - /// Can be called before starting restarting_thread - if (restarting_thread) - restarting_thread->wakeup(); + restarting_thread.wakeup(); return; } @@ -2787,8 +2785,10 @@ void StorageReplicatedMergeTree::startup() data_parts_exchange_endpoint_holder = std::make_shared( data_parts_exchange_endpoint->getId(replica_path), data_parts_exchange_endpoint, context.getInterserverIOHandler()); + queue_task_handle = context.getBackgroundPool().addTask([this] { return queueTask(); }); + /// In this thread replica will be activated. - restarting_thread = std::make_unique(*this); + restarting_thread.start(); /// Wait while restarting_thread initializes LeaderElection (and so on) or makes first attmept to do it startup_event.wait(); @@ -2797,15 +2797,21 @@ void StorageReplicatedMergeTree::startup() void StorageReplicatedMergeTree::shutdown() { - restarting_thread.reset(); + /// Cancel fetches, merges and mutations to force the queue_task to finish ASAP. + fetcher.blocker.cancelForever(); + merger_mutator.actions_blocker.cancelForever(); + + restarting_thread.shutdown(); + + if (queue_task_handle) + context.getBackgroundPool().removeTask(queue_task_handle); + queue_task_handle.reset(); if (data_parts_exchange_endpoint_holder) { data_parts_exchange_endpoint_holder->getBlocker().cancelForever(); data_parts_exchange_endpoint_holder = nullptr; } - - fetcher.blocker.cancelForever(); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.h b/dbms/src/Storages/StorageReplicatedMergeTree.h index ebe28524910..180f893e56c 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.h +++ b/dbms/src/Storages/StorageReplicatedMergeTree.h @@ -198,7 +198,6 @@ private: void clearOldPartsAndRemoveFromZK(); friend class ReplicatedMergeTreeBlockOutputStream; - friend class ReplicatedMergeTreeRestartingThread; friend class ReplicatedMergeTreePartCheckThread; friend class ReplicatedMergeTreeCleanupThread; friend class ReplicatedMergeTreeAlterThread; @@ -303,7 +302,7 @@ private: ReplicatedMergeTreePartCheckThread part_check_thread; /// A thread that processes reconnection to ZooKeeper when the session expires. - std::unique_ptr restarting_thread; + ReplicatedMergeTreeRestartingThread restarting_thread; /// An event that awakens `alter` method from waiting for the completion of the ALTER query. zkutil::EventPtr alter_query_event = std::make_shared(); From 726428a2c5e42e426720dde8a695fa829670b4e4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 21 Aug 2018 18:24:20 +0300 Subject: [PATCH 044/192] Add timeout to socket --- dbms/src/Common/TaskStatsInfoGetter.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 8f4d1c6c4ee..a9e5f6f636a 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -139,7 +139,13 @@ void TaskStatsInfoGetter::init() if (netlink_socket_fd >= 0) return; + struct timeval tv; + tv.tv_sec = 5; + tv.tv_usec = 0; + netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv)); + if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket"); From df012657e84ac2083ee118a3a31ace5589204206 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 21 Aug 2018 18:40:19 +0300 Subject: [PATCH 045/192] Change time to 50 mseconds --- dbms/src/Common/TaskStatsInfoGetter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index a9e5f6f636a..3e65e5e0007 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -140,8 +140,8 @@ void TaskStatsInfoGetter::init() return; struct timeval tv; - tv.tv_sec = 5; - tv.tv_usec = 0; + tv.tv_sec = 0; + tv.tv_usec = 50000; netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv)); From fc2925b567febbebd2f9b41c73949f29462fb87f Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 21 Aug 2018 18:56:50 +0300 Subject: [PATCH 046/192] Squashed commit of the following: commit 2722e540abfee4a527d716474c4ca582eceeee08 Merge: b4f3af824 aebddd550 Author: proller Date: Tue Aug 21 18:34:18 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit b4f3af824b88a8b6826583bb483730827574e8ad Author: proller Date: Tue Aug 21 17:00:20 2018 +0300 fix commit 3a18fa7ded2a7d6b7a0883a1b9c2d6b99360837d Author: proller Date: Tue Aug 21 16:57:17 2018 +0300 fix commit 5d42220c2cf47e1a86bdc73dbfc24b68f8626891 Author: proller Date: Tue Aug 21 16:50:21 2018 +0300 freebsd fixes commit 7baf4fb5ba4575f79b0d12a9ffaaabd34e1006da Author: proller Date: Tue Aug 21 16:17:19 2018 +0300 fix commit e1fe707fd765b841a8d0952d91a980128cbf91d0 Author: proller Date: Tue Aug 21 15:35:21 2018 +0300 fix commit 027887c71b3ffa98d9473d50d8c3b79cbf3304ac Author: proller Date: Tue Aug 21 15:25:57 2018 +0300 fix commit 81af41bfcfff7c02fe4060196cf03c2d2aab416e Author: proller Date: Tue Aug 21 15:20:41 2018 +0300 fix commit 93d572c85d9c7e331254999a614c3b22b5573b02 Author: proller Date: Tue Aug 21 14:58:07 2018 +0300 SPLIT_SHARED commit dd5b8990d8527d59b1e890943d80c2bf27c613ce Author: proller Date: Tue Aug 21 14:52:39 2018 +0300 fix commit 4840ca12ab752a38c1ef482e8ec59c5859bb48d7 Author: proller Date: Tue Aug 21 14:46:31 2018 +0300 fix commit abde633beb86f2a0d025d6fcf079965dbd827b92 Author: proller Date: Tue Aug 21 14:25:32 2018 +0300 fix commit 0d94a5476a5ba8ba5e88638d58f2cfbf2b4b662d Author: proller Date: Tue Aug 21 14:45:18 2018 +0300 Travis: try fail on ninja fail commit c2686f90b68255c2beb0a708804aef404e80a6d2 Merge: 2c3427bbd 2aa7eb463 Author: proller Date: Tue Aug 21 14:25:15 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 2c3427bbdb861edbb188ed4621e8a05cafaedafb Author: proller Date: Mon Aug 20 23:57:24 2018 +0300 fix commit 530170c6a81d31dcfa81230e48520383234df4bc Merge: 9abec162c f6e4ec970 Author: proller Date: Mon Aug 20 23:57:03 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 9abec162cb2e09bbc2f33cbe80fe76791f6e5a77 Author: proller Date: Mon Aug 20 23:49:58 2018 +0300 Apple fixes commit 36d05e8217440fbc8ae21571b06d4eb6d679d538 Author: proller Date: Mon Aug 20 23:25:05 2018 +0300 apple fix commit aeec3e845e4456e89fbb1b1af6f9f36820a46e33 Author: proller Date: Mon Aug 20 23:20:06 2018 +0300 fixes commit 427961d916a5954981e47d94733996deb2a616ce Author: proller Date: Mon Aug 20 23:11:11 2018 +0300 fix commit a7dd55ff8f653624c0f3dbcbc54defd3b3ae97af Author: proller Date: Mon Aug 20 22:41:53 2018 +0300 fix commit 6200e0d315c7a62bae63a8de0fc32f7937770ad2 Merge: 8a541d7e6 21cedbe46 Author: proller Date: Mon Aug 20 22:35:49 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 8a541d7e64c89e2c16af6c909e0353361153aaa3 Author: proller Date: Mon Aug 20 22:34:32 2018 +0300 Do not use poco types commit fd560f43d048b7e3307c6c6b9c9d9918230014d8 Author: proller Date: Mon Aug 20 22:20:42 2018 +0300 Try fix apple build commit cfb2eba07ac06f19e822d3474341d800b1f98cf1 Merge: 8d2e31c90 5b81fdfc0 Author: proller Date: Mon Aug 20 22:03:24 2018 +0300 Merge remote-tracking branch 'upstream/master' into fix3 commit 8d2e31c908be2e99d09e2a9dde2414ab82a5e93c Author: proller Date: Mon Aug 20 21:56:06 2018 +0300 travis: TEST_SERVER_STARTUP_WAIT=10 --- contrib/boost-cmake/CMakeLists.txt | 8 +++-- contrib/cctz-cmake/CMakeLists.txt | 2 +- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- contrib/librdkafka-cmake/CMakeLists.txt | 2 +- contrib/unixodbc-cmake/CMakeLists.txt | 4 +-- contrib/zstd-cmake/CMakeLists.txt | 2 +- dbms/src/Common/Stopwatch.cpp | 5 +++- dbms/src/Common/TaskStatsInfoGetter.cpp | 30 +++++++++++++++++++ dbms/src/Common/TaskStatsInfoGetter.h | 5 ++++ dbms/src/Common/ThreadProfileEvents.h | 14 +++++++-- dbms/src/Common/ThreadStatus.cpp | 2 ++ dbms/src/Common/hex.h | 1 + dbms/src/Common/setThreadName.cpp | 12 +++++--- .../Embedded/GeodataProviders/Entries.h | 1 + .../Dictionaries/MongoDBBlockInputStream.cpp | 4 +-- dbms/src/Functions/FunctionsHashing.h | 2 +- dbms/src/IO/LimitReadBuffer.h | 4 +-- .../src/Interpreters/tests/internal_iotop.cpp | 5 ++++ dbms/tests/clickhouse-test-server | 2 +- libs/libcommon/include/common/DateLUTImpl.h | 2 +- libs/libcommon/include/common/Types.h | 20 ++++++------- libs/libmysqlxx/include/mysqlxx/Types.h | 10 +++---- utils/build/build_macos.sh | 2 +- utils/travis/normal.sh | 5 ++-- 24 files changed, 105 insertions(+), 41 deletions(-) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 7e2379c5738..493a28d1338 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -16,7 +16,7 @@ if (NOT MSVC) add_definitions(-Wno-unused-variable -Wno-deprecated-declarations) endif () -add_library(boost_program_options_internal +add_library(boost_program_options_internal ${SPLIT_SHARED} ${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp ${LIBRARY_DIR}/libs/program_options/src/config_file.cpp ${LIBRARY_DIR}/libs/program_options/src/convert.cpp @@ -29,7 +29,7 @@ ${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp ${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp ${LIBRARY_DIR}/libs/program_options/src/winmain.cpp) -add_library(boost_filesystem_internal +add_library(boost_filesystem_internal ${SPLIT_SHARED} ${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp ${LIBRARY_DIR}/libs/filesystem/src/operations.cpp ${LIBRARY_DIR}/libs/filesystem/src/path.cpp @@ -39,9 +39,11 @@ ${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp ${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp) -add_library(boost_system_internal +add_library(boost_system_internal ${SPLIT_SHARED} ${LIBRARY_DIR}/libs/system/src/error_code.cpp) +target_link_libraries (boost_filesystem_internal PRIVATE boost_system_internal) + target_include_directories (boost_program_options_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (boost_filesystem_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (boost_system_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 9c2f6d9a658..9d996486de7 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/cctz) -add_library(cctz +add_library(cctz ${SPLIT_SHARED} ${LIBRARY_DIR}/src/civil_time_detail.cc ${LIBRARY_DIR}/src/time_zone_fixed.cc ${LIBRARY_DIR}/src/time_zone_format.cc diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index d60d34604a9..95d04853a41 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -40,7 +40,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() -add_library(jemalloc STATIC ${SRCS}) +add_library(jemalloc ${SPLIT_SHARED} ${SRCS}) target_include_directories(jemalloc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 7211c791b2f..0bae6f579ad 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -54,7 +54,7 @@ ${RDKAFKA_SOURCE_DIR}/lz4hc.c ${RDKAFKA_SOURCE_DIR}/rdgz.c ) -add_library(rdkafka STATIC ${SRCS}) +add_library(rdkafka ${SPLIT_SHARED} ${SRCS}) target_include_directories(rdkafka PRIVATE include) target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index 4f9f6b41538..de04dd20a87 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -23,7 +23,7 @@ ${ODBC_SOURCE_DIR}/libltdl/loaders/preopen.c ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/libltdl/libltdlcS.c ) -add_library(ltdl STATIC ${SRCS}) +add_library(ltdl ${SPLIT_SHARED} ${SRCS}) target_include_directories(ltdl PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/libltdl) target_include_directories(ltdl PUBLIC ${ODBC_SOURCE_DIR}/libltdl) @@ -273,7 +273,7 @@ ${ODBC_SOURCE_DIR}/lst/lstSetFreeFunc.c ${ODBC_SOURCE_DIR}/lst/_lstVisible.c ) -add_library(unixodbc STATIC ${SRCS}) +add_library(unixodbc ${SPLIT_SHARED} ${SRCS}) target_link_libraries(unixodbc ltdl) diff --git a/contrib/zstd-cmake/CMakeLists.txt b/contrib/zstd-cmake/CMakeLists.txt index 6d8ecce2a44..64bb36c6b1d 100644 --- a/contrib/zstd-cmake/CMakeLists.txt +++ b/contrib/zstd-cmake/CMakeLists.txt @@ -125,6 +125,6 @@ IF (ZSTD_LEGACY_SUPPORT) ${LIBRARY_LEGACY_DIR}/zstd_v07.h) ENDIF (ZSTD_LEGACY_SUPPORT) -ADD_LIBRARY(zstd ${Sources} ${Headers}) +ADD_LIBRARY(zstd ${SPLIT_SHARED} ${Sources} ${Headers}) target_include_directories (zstd PUBLIC ${LIBRARY_DIR}) diff --git a/dbms/src/Common/Stopwatch.cpp b/dbms/src/Common/Stopwatch.cpp index b6d7092c054..ef48d910135 100644 --- a/dbms/src/Common/Stopwatch.cpp +++ b/dbms/src/Common/Stopwatch.cpp @@ -6,8 +6,11 @@ StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current() StopwatchRUsage::Timestamp res; ::rusage rusage; +#if defined(__APPLE__) + ::getrusage(RUSAGE_SELF, &rusage); // TODO FIXME! need rusage_thread +#else ::getrusage(RUSAGE_THREAD, &rusage); - +#endif res.user_ns = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL; res.sys_ns = rusage.ru_stime.tv_sec * 1000000000UL + rusage.ru_stime.tv_usec * 1000UL; return res; diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 8f4d1c6c4ee..3caed0c07d1 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -3,16 +3,24 @@ #include #include +#if defined(__linux__) #include #include #include +#endif + #include #include #include #include #include #include + +#if __has_include() +#include +#else #include +#endif /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt @@ -40,9 +48,11 @@ static size_t constexpr MAX_MSG_SIZE = 1024; struct NetlinkMessage { +#if defined(__linux__) ::nlmsghdr n; ::genlmsghdr g; char buf[MAX_MSG_SIZE]; +#endif }; @@ -55,6 +65,7 @@ int sendCommand( void * nla_data, int nla_len) noexcept { +#if defined(__linux__) NetlinkMessage msg{}; msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); @@ -93,6 +104,7 @@ int sendCommand( else if (errno != EAGAIN) return -1; } +#endif return 0; } @@ -100,6 +112,7 @@ int sendCommand( UInt16 getFamilyId(int nl_sock_fd) noexcept { +#if defined(__linux__) struct { ::nlmsghdr header; @@ -127,6 +140,9 @@ UInt16 getFamilyId(int nl_sock_fd) noexcept id = *static_cast(NLA_DATA(attr)); return id; +#else + return 0; +#endif } } @@ -136,6 +152,7 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() = default; void TaskStatsInfoGetter::init() { +#if defined(__linux__) if (netlink_socket_fd >= 0) return; @@ -150,8 +167,11 @@ void TaskStatsInfoGetter::init() throwFromErrno("Can't bind PF_NETLINK socket"); netlink_family_id = getFamilyId(netlink_socket_fd); +#endif } + +#if defined(__linux__) bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error) { init(); @@ -216,6 +236,7 @@ bool TaskStatsInfoGetter::tryGetStat(::taskstats & stat, int tid) tid = tid < 0 ? getDefaultTID() : tid; return getStatImpl(tid, stat, false); } +#endif TaskStatsInfoGetter::~TaskStatsInfoGetter() { @@ -225,8 +246,12 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter() int TaskStatsInfoGetter::getCurrentTID() { +#if defined(__linux__) /// This call is always successful. - man gettid return static_cast(syscall(SYS_gettid)); +#else + return 0; +#endif } int TaskStatsInfoGetter::getDefaultTID() @@ -239,9 +264,13 @@ int TaskStatsInfoGetter::getDefaultTID() static bool tryGetTaskStats() { +#if defined(__linux__) TaskStatsInfoGetter getter; ::taskstats stat; return getter.tryGetStat(stat); +#else + return false; +#endif } bool TaskStatsInfoGetter::checkProcessHasRequiredPermissions() @@ -251,4 +280,5 @@ bool TaskStatsInfoGetter::checkProcessHasRequiredPermissions() return res; } + } diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index c89194cf88a..8d256f07ec1 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -17,8 +17,10 @@ public: TaskStatsInfoGetter(); TaskStatsInfoGetter(const TaskStatsInfoGetter &) = delete; +#if defined(__linux__) void getStat(::taskstats & stat, int tid = -1); bool tryGetStat(::taskstats & stat, int tid = -1); +#endif ~TaskStatsInfoGetter(); @@ -33,7 +35,10 @@ private: int getDefaultTID(); int default_tid = -1; +#if defined(__linux__) bool getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error = false); +#endif + void init(); int netlink_socket_fd = -1; diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 3a780f509a7..0edbe46c43b 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -5,8 +5,10 @@ #include #include #include -#include +#if defined(__linux__) +#include +#endif namespace ProfileEvents { @@ -83,7 +85,12 @@ struct RUsageCounters static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds()) { ::rusage rusage; +#if defined(__APPLE__) + ::getrusage(RUSAGE_SELF, &rusage); +#else ::getrusage(RUSAGE_THREAD, &rusage); +#endif + return RUsageCounters(rusage, real_time_); } @@ -108,14 +115,16 @@ struct RUsageCounters struct TasksStatsCounters { +#if defined(__linux__) ::taskstats stat; - +#endif TasksStatsCounters() = default; static TasksStatsCounters current(); static void incrementProfileEvents(const TasksStatsCounters & prev, const TasksStatsCounters & curr, ProfileEvents::Counters & profile_events) { +#if defined(__linux__) profile_events.increment(ProfileEvents::OSCPUWaitMicroseconds, safeDiff(prev.stat.cpu_delay_total, curr.stat.cpu_delay_total) / 1000U); profile_events.increment(ProfileEvents::OSIOWaitMicroseconds, @@ -131,6 +140,7 @@ struct TasksStatsCounters profile_events.increment(ProfileEvents::OSWriteChars, safeDiff(prev.stat.write_char, curr.stat.write_char)); profile_events.increment(ProfileEvents::OSReadBytes, safeDiff(prev.stat.read_bytes, curr.stat.read_bytes)); profile_events.increment(ProfileEvents::OSWriteBytes, safeDiff(prev.stat.write_bytes, curr.stat.write_bytes)); +#endif } static void updateProfileEvents(TasksStatsCounters & last_counters, ProfileEvents::Counters & profile_events) diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index d4cca1b326c..34b88a466ee 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -27,7 +27,9 @@ thread_local ThreadStatus::CurrentThreadScope current_thread_scope; TasksStatsCounters TasksStatsCounters::current() { TasksStatsCounters res; +#if __linux__ current_thread->taskstats_getter->getStat(res.stat, current_thread->os_thread_id); +#endif return res; } diff --git a/dbms/src/Common/hex.h b/dbms/src/Common/hex.h index 071ee60029e..f5ca4904b8c 100644 --- a/dbms/src/Common/hex.h +++ b/dbms/src/Common/hex.h @@ -1,4 +1,5 @@ #pragma once +#include /// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. diff --git a/dbms/src/Common/setThreadName.cpp b/dbms/src/Common/setThreadName.cpp index d92b2e84715..c5c4fd54e08 100644 --- a/dbms/src/Common/setThreadName.cpp +++ b/dbms/src/Common/setThreadName.cpp @@ -39,13 +39,17 @@ std::string getThreadName() { std::string name(16, '\0'); -#if defined(__FreeBSD__) || defined(__APPLE__) - if (pthread_get_name_np(pthread_self(), name.data(), name.size()); - throw DB::Exception("Cannot get thread name with pthread_get_name_np()", DB::ErrorCodes::PTHREAD_ERROR); +#if defined(__APPLE__) + if (pthread_getname_np(pthread_self(), name.data(), name.size())); + throw DB::Exception("Cannot get thread name with pthread_getname_np()", DB::ErrorCodes::PTHREAD_ERROR); +#elif defined(__FreeBSD__) +// TODO: make test. freebsd will have this function soon https://freshbsd.org/commit/freebsd/r337983 +// if (pthread_get_name_np(pthread_self(), name.data(), name.size())); +// throw DB::Exception("Cannot get thread name with pthread_get_name_np()", DB::ErrorCodes::PTHREAD_ERROR); #else if (0 != prctl(PR_GET_NAME, name.data(), 0, 0, 0)) -#endif DB::throwFromErrno("Cannot get thread name with prctl(PR_GET_NAME)"); +#endif name.resize(std::strlen(name.data())); return name; diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h index 9e6b253266f..8e68095fc65 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/Entries.h @@ -1,6 +1,7 @@ #pragma once #include +#include struct RegionEntry { diff --git a/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp b/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp index e6d7f54c818..89ae0d0357c 100644 --- a/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp +++ b/dbms/src/Dictionaries/MongoDBBlockInputStream.cpp @@ -56,8 +56,8 @@ namespace case Poco::MongoDB::ElementTraits::TypeId: static_cast &>(column).getData().push_back(static_cast &>(value).value()); break; - case Poco::MongoDB::ElementTraits::TypeId: - static_cast &>(column).getData().push_back(static_cast &>(value).value()); + case Poco::MongoDB::ElementTraits::TypeId: + static_cast &>(column).getData().push_back(static_cast &>(value).value()); break; case Poco::MongoDB::ElementTraits::TypeId: static_cast &>(column).getData().push_back(static_cast &>(value).value()); diff --git a/dbms/src/Functions/FunctionsHashing.h b/dbms/src/Functions/FunctionsHashing.h index 3014d4205a2..4432c520da7 100644 --- a/dbms/src/Functions/FunctionsHashing.h +++ b/dbms/src/Functions/FunctionsHashing.h @@ -72,7 +72,7 @@ struct HalfMD5Impl union { unsigned char char_data[16]; - Poco::UInt64 uint64_data; + uint64_t uint64_data; } buf; MD5_CTX ctx; diff --git a/dbms/src/IO/LimitReadBuffer.h b/dbms/src/IO/LimitReadBuffer.h index 31ec037ff73..a03993e9fa5 100644 --- a/dbms/src/IO/LimitReadBuffer.h +++ b/dbms/src/IO/LimitReadBuffer.h @@ -13,14 +13,14 @@ class LimitReadBuffer : public ReadBuffer { private: ReadBuffer & in; - UInt64 limit; + size_t limit; bool throw_exception; std::string exception_message; bool nextImpl() override; public: - LimitReadBuffer(ReadBuffer & in, UInt64 limit, bool throw_exception, std::string exception_message = {}); + LimitReadBuffer(ReadBuffer & in, size_t limit, bool throw_exception, std::string exception_message = {}); ~LimitReadBuffer() override; }; diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index 536a2b3d0ae..ca6cd0ce5a3 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -7,7 +7,10 @@ #include #include +#if defined(__linux__) #include +#endif + #include #include #include @@ -48,6 +51,7 @@ using namespace DB; void do_io(size_t id) { +#if defined(__linux__) ::taskstats stat; int tid = TaskStatsInfoGetter::getCurrentTID(); TaskStatsInfoGetter get_info; @@ -104,6 +108,7 @@ void do_io(size_t id) } Poco::File(path_dst).remove(false); +#endif } void test_perf() diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 3845da2a93f..79ef37c5e34 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -77,7 +77,7 @@ $GDB ${BIN_DIR}clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- \ $INTERNAL_COMPILER_PARAMS \ > $LOG_DIR/stdout 2>&1 & CH_PID=$! -sleep ${TEST_SERVER_STARTUP_WAIT:=5} +sleep ${TEST_SERVER_STARTUP_WAIT:=10} if [ "$GDB" ]; then # Long symbols read diff --git a/libs/libcommon/include/common/DateLUTImpl.h b/libs/libcommon/include/common/DateLUTImpl.h index 3a57e1396f5..ab04b7cbcbf 100644 --- a/libs/libcommon/include/common/DateLUTImpl.h +++ b/libs/libcommon/include/common/DateLUTImpl.h @@ -3,8 +3,8 @@ #include #include #include - #include +#include #define DATE_LUT_MAX (0xFFFFFFFFU - 86400) #define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400) diff --git a/libs/libcommon/include/common/Types.h b/libs/libcommon/include/common/Types.h index fa110da7af3..a6bfcc6ae31 100644 --- a/libs/libcommon/include/common/Types.h +++ b/libs/libcommon/include/common/Types.h @@ -1,16 +1,16 @@ #pragma once +#include +#include -#include +using Int8 = int8_t; +using Int16 = int16_t; +using Int32 = int32_t; +using Int64 = int64_t; -using Int8 = Poco::Int8; -using Int16 = Poco::Int16; -using Int32 = Poco::Int32; -using Int64 = Poco::Int64; - -using UInt8 = Poco::UInt8; -using UInt16 = Poco::UInt16; -using UInt32 = Poco::UInt32; -using UInt64 = Poco::UInt64; +using UInt8 = uint8_t; +using UInt16 = uint16_t; +using UInt32 = uint32_t; +using UInt64 = uint64_t; /** This is not the best way to overcome an issue of different definitions diff --git a/libs/libmysqlxx/include/mysqlxx/Types.h b/libs/libmysqlxx/include/mysqlxx/Types.h index c64487d126d..30abdeb9fd0 100644 --- a/libs/libmysqlxx/include/mysqlxx/Types.h +++ b/libs/libmysqlxx/include/mysqlxx/Types.h @@ -1,7 +1,7 @@ #pragma once +#include #include -#include struct st_mysql; using MYSQL = st_mysql; @@ -19,10 +19,10 @@ using MYSQL_FIELD = st_mysql_field; namespace mysqlxx { -using UInt64 = Poco::UInt64; -using Int64 = Poco::Int64; -using UInt32 = Poco::UInt32; -using Int32 = Poco::Int32; +using UInt64 = uint64_t; +using Int64 = int64_t; +using UInt32 = uint32_t; +using Int32 = int32_t; using MYSQL_LENGTH = unsigned long; using MYSQL_LENGTHS = MYSQL_LENGTH *; diff --git a/utils/build/build_macos.sh b/utils/build/build_macos.sh index 462dd3e528e..b6f9f842e72 100755 --- a/utils/build/build_macos.sh +++ b/utils/build/build_macos.sh @@ -12,7 +12,7 @@ fi ## Install required compilers, tools, libraries -brew install cmake gcc icu4c mariadb-connector-c openssl unixodbc libtool gettext readline +brew install cmake gcc icu4c mariadb-connector-c openssl unixodbc libtool gettext readline librdkafka ## Checkout ClickHouse sources diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 2824a17c5c9..dd901a815f5 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -33,11 +33,12 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL -DUNBUNDLED=1 \ `# Disable all features` \ -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ - && ninja clickhouse-bundle \ +ninja clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ `# 00428 requires sudo (not all vms allow this)` \ `# 00385 runs infinitly (TODO: fix it)` \ - && ( [ ! ${TEST_RUN=1} ] || ( ( cd $CUR_DIR/../.. && env TEST_OPT="--skip long compile 00428 00385 $TEST_OPT" TEST_PORT_RANDOM= TEST_PERF= bash -x dbms/tests/clickhouse-test-server ) || ${TEST_TRUE=false} ) ) + +[ ! ${TEST_RUN=1} ] || ( ( cd $CUR_DIR/../.. && env TEST_OPT="--skip long compile 00428 00385 $TEST_OPT" TEST_PORT_RANDOM= TEST_PERF= TEST_SERVER_STARTUP_WAIT=10 bash -x dbms/tests/clickhouse-test-server ) || ${TEST_TRUE=false} ) date From 46f00c37e47320b585e99ddd7404c64a684412d1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 21 Aug 2018 19:13:07 +0300 Subject: [PATCH 047/192] Update TaskStatsInfoGetter.cpp --- dbms/src/Common/TaskStatsInfoGetter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 3e65e5e0007..204b86750b4 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -144,10 +144,11 @@ void TaskStatsInfoGetter::init() tv.tv_usec = 50000; netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); - ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv)); - if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket"); + + if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) + throwFromErrno("Can't set timeout on PF_NETLINK socket"); ::sockaddr_nl addr{}; addr.nl_family = AF_NETLINK; From 1a2b3bc9853f8d037c241751b462b68c16e314fb Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 21 Aug 2018 19:17:27 +0300 Subject: [PATCH 048/192] fix --- contrib/jemalloc-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 95d04853a41..d60d34604a9 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -40,7 +40,7 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() -add_library(jemalloc ${SPLIT_SHARED} ${SRCS}) +add_library(jemalloc STATIC ${SRCS}) target_include_directories(jemalloc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include From 87de8db71769f0df97409fac86f8525ab8b96f68 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 21 Aug 2018 19:34:05 +0300 Subject: [PATCH 049/192] fix --- utils/travis/normal.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index dd901a815f5..40aeca33424 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -33,6 +33,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL -DUNBUNDLED=1 \ `# Disable all features` \ -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + ninja clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ From a04177290bac079737712583b9b54937ebe16705 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Aug 2018 19:40:54 +0300 Subject: [PATCH 050/192] Miscellaneous #2482 --- dbms/programs/server/Server.cpp | 6 +++--- dbms/src/Common/TaskStatsInfoGetter.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 3a55add70af..f7d66269aea 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -368,9 +368,9 @@ int Server::main(const std::vector & /*args*/) if (!TaskStatsInfoGetter::checkProcessHasRequiredPermissions()) { - LOG_INFO(log, "It looks like the process has not CAP_NET_ADMIN capability, some performance statistics will be disabled." - " It could happen due to incorrect clickhouse package installation." - " You could resolve the problem manually calling 'sudo setcap cap_net_admin=+ep /usr/bin/clickhouse'"); + LOG_INFO(log, "It looks like the process has no CAP_NET_ADMIN capability, some performance statistics will be disabled." + " It could happen due to incorrect ClickHouse package installation." + " You could resolve the problem manually with 'sudo setcap cap_net_admin=+ep /usr/bin/clickhouse'"); } { diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 204b86750b4..c0ad1a5f951 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -146,7 +146,7 @@ void TaskStatsInfoGetter::init() netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket"); - + if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) throwFromErrno("Can't set timeout on PF_NETLINK socket"); From f68c322dca528290c6bd9df05af8802ae2f5528e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Aug 2018 19:49:20 +0300 Subject: [PATCH 051/192] Added error codes #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index c0ad1a5f951..339016a2bcc 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -145,16 +145,16 @@ void TaskStatsInfoGetter::init() netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) - throwFromErrno("Can't create PF_NETLINK socket"); + throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) - throwFromErrno("Can't set timeout on PF_NETLINK socket"); + throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); ::sockaddr_nl addr{}; addr.nl_family = AF_NETLINK; if (::bind(netlink_socket_fd, reinterpret_cast(&addr), sizeof(addr)) < 0) - throwFromErrno("Can't bind PF_NETLINK socket"); + throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); netlink_family_id = getFamilyId(netlink_socket_fd); } @@ -164,7 +164,7 @@ bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool thr init(); if (sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t))) - throwFromErrno("Can't send a Netlink command"); + throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); NetlinkMessage msg; ssize_t rv = ::recv(netlink_socket_fd, &msg, sizeof(msg), 0); From 51df4222db36b413acf3055144c6b4e3d821c75d Mon Sep 17 00:00:00 2001 From: champtar Date: Tue, 21 Aug 2018 13:15:18 -0400 Subject: [PATCH 052/192] Fix x86_64 detection on CentOS7 / Fedora (#2912) * Fix x86_64 detection on CentOS7 / Fedora CMAKE_LIBRARY_ARCHITECTURE is empty on CentOS7 / Fedora CMAKE_SYSTEM_PROCESSOR is what we really want only tested on CentOS7 / Fedora 28 Signed-off-by: Etienne Champetier * Support freebsd --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2ae51712f6a..bec91ce705a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -78,7 +78,7 @@ if (USE_STATIC_LIBRARIES) list(REVERSE CMAKE_FIND_LIBRARY_SUFFIXES) endif () -if (CMAKE_LIBRARY_ARCHITECTURE MATCHES "amd64.*|x86_64.*|AMD64.*") +if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) if (OS_LINUX) From 300457da7acc6af4c9bfe3feab724ec4e9d02877 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 21 Aug 2018 20:25:00 +0300 Subject: [PATCH 053/192] fix --- dbms/src/IO/AIO.h | 2 ++ utils/check_include.sh | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index e21a316a770..3717f9ab90d 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -2,6 +2,8 @@ #if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#include + /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h #define timespec linux_timespec #define timeval linux_timeval diff --git a/utils/check_include.sh b/utils/check_include.sh index 2a7addb1876..fa207d482fc 100755 --- a/utils/check_include.sh +++ b/utils/check_include.sh @@ -14,6 +14,7 @@ inc="-I. \ -I./contrib/libmetrohash/src \ -I./contrib/double-conversion \ -I./contrib/cityhash102/include \ +-I./contrib/murmurhash/include \ -I./contrib/zookeeper/src/c/include \ -I./contrib/zookeeper/src/c/generated \ -I./contrib/libtcmalloc/include \ @@ -49,5 +50,5 @@ if [ -z $1 ]; then else echo -n "$1 " echo -n `grep "#include" $1| wc -l` " " - echo -e "#include <$1> \n int main() {return 0;}" | time --format "%e %M" ${CXX:=g++-7} -c -std=c++1z $inc -x c++ - + echo "#include <$1> \n int main() {return 0;}" | time --format "%e %M" ${CXX:=g++-7} -c -std=c++1z $inc -x c++ - fi From 3a00be1aab90f8b57a1ddaae75e2c7445206bc10 Mon Sep 17 00:00:00 2001 From: proller Date: Tue, 21 Aug 2018 21:35:31 +0300 Subject: [PATCH 054/192] fix --- libs/libdaemon/include/daemon/OwnFormattingChannel.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libs/libdaemon/include/daemon/OwnFormattingChannel.h b/libs/libdaemon/include/daemon/OwnFormattingChannel.h index e206debb8e5..52045606c14 100644 --- a/libs/libdaemon/include/daemon/OwnFormattingChannel.h +++ b/libs/libdaemon/include/daemon/OwnFormattingChannel.h @@ -3,9 +3,7 @@ #include #include #include - - -class OwnPatternFormatter; +#include "OwnPatternFormatter.h" namespace DB From 99d7b8a7ad986ba47a95d06656d38ed4f74de240 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 21 Aug 2018 21:42:46 +0300 Subject: [PATCH 055/192] Allow to quit client while query is in progress by pressing Ctrl+C twice #2877 --- dbms/programs/client/Client.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 5227edcafe6..a94e6865730 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -1527,7 +1527,9 @@ public: /// This is signal safe. ssize_t res = write(STDOUT_FILENO, "\n", 1); - if (res == 1 && rl_line_buffer[0]) + /// Allow to quit client while query is in progress by pressing Ctrl+C twice. + /// (First press to Ctrl+C will try to cancel query by InterruptListener). + if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) { rl_replace_line("", 0); if (rl_forced_update_display()) From 3fdff1abea94fc24d4c22e4ce9573fbf64606a68 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 00:05:30 +0300 Subject: [PATCH 056/192] TaskStats: better code #2482 --- dbms/programs/server/Server.cpp | 2 +- dbms/src/Common/TaskStatsInfoGetter.cpp | 49 ++++++++++++++----------- dbms/src/Common/TaskStatsInfoGetter.h | 2 +- dbms/src/Common/ThreadStatus.cpp | 2 +- 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index f7d66269aea..b22ab82559d 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -366,7 +366,7 @@ int Server::main(const std::vector & /*args*/) dns_cache_updater = std::make_unique(*global_context); } - if (!TaskStatsInfoGetter::checkProcessHasRequiredPermissions()) + if (!TaskStatsInfoGetter::checkPermissions()) { LOG_INFO(log, "It looks like the process has no CAP_NET_ADMIN capability, some performance statistics will be disabled." " It could happen due to incorrect ClickHouse package installation." diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 339016a2bcc..2662a60cae3 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -14,6 +14,7 @@ #include #include + /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt @@ -46,7 +47,7 @@ struct NetlinkMessage }; -int sendCommand( +void sendCommand( int sock_fd, UInt16 nlmsg_type, UInt32 nlmsg_pid, @@ -91,10 +92,8 @@ int sendCommand( buflen -= r; } else if (errno != EAGAIN) - return -1; + throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); } - - return 0; } @@ -109,16 +108,19 @@ UInt16 getFamilyId(int nl_sock_fd) noexcept static char name[] = TASKSTATS_GENL_NAME; - if (sendCommand( + sendCommand( nl_sock_fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *) name, - strlen(TASKSTATS_GENL_NAME) + 1)) - return 0; + strlen(TASKSTATS_GENL_NAME) + 1); UInt16 id = 0; ssize_t rep_len = ::recv(nl_sock_fd, &answer, sizeof(answer), 0); - if (answer.header.nlmsg_type == NLMSG_ERROR || (rep_len < 0) || !NLMSG_OK((&answer.header), rep_len)) - return 0; + if (rep_len < 0) + throwFromErrno("Cannot get the family id for " + std::string(TASKSTATS_GENL_NAME) + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); + + if (answer.header.nlmsg_type == NLMSG_ERROR ||!NLMSG_OK((&answer.header), rep_len)) + throw Exception("Received an error instead of the family id for " + std::string(TASKSTATS_GENL_NAME) + + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); const ::nlattr * attr; attr = static_cast(GENLMSG_DATA(&answer)); @@ -134,19 +136,20 @@ UInt16 getFamilyId(int nl_sock_fd) noexcept TaskStatsInfoGetter::TaskStatsInfoGetter() = default; + void TaskStatsInfoGetter::init() { if (netlink_socket_fd >= 0) return; - struct timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 50000; - netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + struct timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 50000; + if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); @@ -159,12 +162,12 @@ void TaskStatsInfoGetter::init() netlink_family_id = getFamilyId(netlink_socket_fd); } + bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error) { init(); - if (sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t))) - throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); + sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); NetlinkMessage msg; ssize_t rv = ::recv(netlink_socket_fd, &msg, sizeof(msg), 0); @@ -212,6 +215,7 @@ bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool thr return true; } + void TaskStatsInfoGetter::getStat(::taskstats & stat, int tid) { tid = tid < 0 ? getDefaultTID() : tid; @@ -224,12 +228,6 @@ bool TaskStatsInfoGetter::tryGetStat(::taskstats & stat, int tid) return getStatImpl(tid, stat, false); } -TaskStatsInfoGetter::~TaskStatsInfoGetter() -{ - if (netlink_socket_fd >= 0) - close(netlink_socket_fd); -} - int TaskStatsInfoGetter::getCurrentTID() { /// This call is always successful. - man gettid @@ -251,11 +249,18 @@ static bool tryGetTaskStats() return getter.tryGetStat(stat); } -bool TaskStatsInfoGetter::checkProcessHasRequiredPermissions() +bool TaskStatsInfoGetter::checkPermissions() { /// It is thread- and exception- safe since C++11 static bool res = tryGetTaskStats(); return res; } + +TaskStatsInfoGetter::~TaskStatsInfoGetter() +{ + if (netlink_socket_fd >= 0) + close(netlink_socket_fd); +} + } diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index c89194cf88a..467422817f9 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -26,7 +26,7 @@ public: static int getCurrentTID(); /// Whether the current process has permissions (sudo or cap_net_admin capabilties) to get taskstats info - static bool checkProcessHasRequiredPermissions(); + static bool checkPermissions(); private: /// Caches current thread tid to avoid extra sys calls diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index d4cca1b326c..d4a47403a2b 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -70,7 +70,7 @@ void ThreadStatus::initPerformanceCounters() ++queries_started; *last_rusage = RUsageCounters::current(query_start_time_nanoseconds); - has_permissions_for_taskstats = TaskStatsInfoGetter::checkProcessHasRequiredPermissions(); + has_permissions_for_taskstats = TaskStatsInfoGetter::checkPermissions(); if (has_permissions_for_taskstats) *last_taskstats = TasksStatsCounters::current(); } From aea01a4e0d6cb3b9f797e4e317a5a73ac70e07fc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 00:07:06 +0300 Subject: [PATCH 057/192] TaskStats: better code #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 2662a60cae3..c0dadfdb64a 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -54,7 +54,7 @@ void sendCommand( UInt8 genl_cmd, UInt16 nla_type, void * nla_data, - int nla_len) noexcept + int nla_len) { NetlinkMessage msg{}; From ea98d79e567c56e56836b0f667a2f48c605cc3be Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 00:07:37 +0300 Subject: [PATCH 058/192] TaskStats: better code #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index c0dadfdb64a..2fb31a481d4 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -97,7 +97,7 @@ void sendCommand( } -UInt16 getFamilyId(int nl_sock_fd) noexcept +UInt16 getFamilyId(int nl_sock_fd) { struct { From e0bf8c757b019a80060a2949ffe030bc193dd5d6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 02:52:03 +0300 Subject: [PATCH 059/192] Checking for capabilities in more direct way #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 2fb31a481d4..c5bee1daff3 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -242,17 +243,27 @@ int TaskStatsInfoGetter::getDefaultTID() return default_tid; } -static bool tryGetTaskStats() + +static bool checkPermissionsImpl() { - TaskStatsInfoGetter getter; - ::taskstats stat; - return getter.tryGetStat(stat); + /// See man getcap. + __user_cap_header_struct request{}; + request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. + request.pid = getpid(); + + __user_cap_data_struct response{}; + + /// Avoid dependency on 'libcap'. + if (0 != syscall(SYS_capget, &request, &response)) + throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR); + + return (1 << CAP_NET_ADMIN) & response.effective; } bool TaskStatsInfoGetter::checkPermissions() { /// It is thread- and exception- safe since C++11 - static bool res = tryGetTaskStats(); + static bool res = checkPermissionsImpl(); return res; } From b2b26f6e54cb7b1d6d26d322a42d6a8a13d9c183 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 02:56:55 +0300 Subject: [PATCH 060/192] Removed useless method #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 16 +++------------- dbms/src/Common/TaskStatsInfoGetter.h | 4 ++-- dbms/src/Interpreters/tests/internal_iotop.cpp | 13 +++---------- 3 files changed, 8 insertions(+), 25 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index c5bee1daff3..22523ae8849 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -164,7 +164,7 @@ void TaskStatsInfoGetter::init() } -bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error) +void TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats) { init(); @@ -176,10 +176,7 @@ bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool thr if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), rv)) { const ::nlmsgerr * err = static_cast(NLMSG_DATA(&msg)); - if (throw_on_error) - throw Exception("Can't get Netlink response, error: " + std::to_string(err->error), ErrorCodes::NETLINK_ERROR); - else - return false; + throw Exception("Can't get Netlink response, error: " + std::to_string(err->error), ErrorCodes::NETLINK_ERROR); } rv = GENLMSG_PAYLOAD(&msg.n); @@ -212,22 +209,15 @@ bool TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats, bool thr attr = reinterpret_cast(reinterpret_cast(GENLMSG_DATA(&msg)) + len); } - - return true; } void TaskStatsInfoGetter::getStat(::taskstats & stat, int tid) { tid = tid < 0 ? getDefaultTID() : tid; - getStatImpl(tid, stat, true); + getStatImpl(tid, stat); } -bool TaskStatsInfoGetter::tryGetStat(::taskstats & stat, int tid) -{ - tid = tid < 0 ? getDefaultTID() : tid; - return getStatImpl(tid, stat, false); -} int TaskStatsInfoGetter::getCurrentTID() { diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index 467422817f9..1fe1d325ba8 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -1,4 +1,5 @@ #pragma once + #include struct taskstats; @@ -18,7 +19,6 @@ public: TaskStatsInfoGetter(const TaskStatsInfoGetter &) = delete; void getStat(::taskstats & stat, int tid = -1); - bool tryGetStat(::taskstats & stat, int tid = -1); ~TaskStatsInfoGetter(); @@ -33,7 +33,7 @@ private: int getDefaultTID(); int default_tid = -1; - bool getStatImpl(int tid, ::taskstats & out_stats, bool throw_on_error = false); + void getStatImpl(int tid, ::taskstats & out_stats); void init(); int netlink_socket_fd = -1; diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index 536a2b3d0ae..2983cac5956 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -52,16 +52,9 @@ void do_io(size_t id) int tid = TaskStatsInfoGetter::getCurrentTID(); TaskStatsInfoGetter get_info; - if (!get_info.tryGetStat(stat, tid)) - { - std::lock_guard lock(mutex); - std::cerr << "#" << id << ", tid " << tid << ". Can't get stat\n"; - } - else - { - std::lock_guard lock(mutex); - std::cerr << "#" << id << ", tid " << tid << ", intitial\n" << stat << "\n"; - } + get_info.getStat(stat, tid)) + std::lock_guard lock(mutex); + std::cerr << "#" << id << ", tid " << tid << ", intitial\n" << stat << "\n"; size_t copy_size = 1048576 * (1 + id); std::string path_dst = "test_out_" + std::to_string(id); From 6622b6947ab5f2c1fbd9ede1d153cf4a49af5675 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 02:57:59 +0300 Subject: [PATCH 061/192] wip --- cmake/find_boost.cmake | 4 ++-- contrib/boost-cmake/CMakeLists.txt | 2 +- libs/libcommon/CMakeLists.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/find_boost.cmake b/cmake/find_boost.cmake index a99211efe08..5a024d4d3db 100644 --- a/cmake/find_boost.cmake +++ b/cmake/find_boost.cmake @@ -26,9 +26,9 @@ endif () if (NOT Boost_SYSTEM_LIBRARY) set (USE_INTERNAL_BOOST_LIBRARY 1) - set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal) set (Boost_SYSTEM_LIBRARY boost_system_internal) - set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal) + set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal) + set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY}) set (Boost_INCLUDE_DIRS) diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 493a28d1338..d34fcbe3e40 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -42,7 +42,7 @@ ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp) add_library(boost_system_internal ${SPLIT_SHARED} ${LIBRARY_DIR}/libs/system/src/error_code.cpp) -target_link_libraries (boost_filesystem_internal PRIVATE boost_system_internal) +target_link_libraries (boost_filesystem_internal PUBLIC boost_system_internal) target_include_directories (boost_program_options_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (boost_filesystem_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) diff --git a/libs/libcommon/CMakeLists.txt b/libs/libcommon/CMakeLists.txt index 99b02ec1f26..e327dfb102c 100644 --- a/libs/libcommon/CMakeLists.txt +++ b/libs/libcommon/CMakeLists.txt @@ -104,8 +104,8 @@ target_link_libraries ( pocoext ${CITYHASH_LIBRARIES} ${CCTZ_LIBRARY} - ${Boost_SYSTEM_LIBRARY} ${Boost_FILESYSTEM_LIBRARY} + ${Boost_SYSTEM_LIBRARY} ${MALLOC_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${GLIBC_COMPATIBILITY_LIBRARIES} From 509b43328e5fb6a3cfda5c73525f8d4f92c517b9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 02:58:12 +0300 Subject: [PATCH 062/192] Removed useless method #2482 --- dbms/src/Interpreters/tests/internal_iotop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index 2983cac5956..aadaf7127a2 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -52,7 +52,7 @@ void do_io(size_t id) int tid = TaskStatsInfoGetter::getCurrentTID(); TaskStatsInfoGetter get_info; - get_info.getStat(stat, tid)) + get_info.getStat(stat, tid); std::lock_guard lock(mutex); std::cerr << "#" << id << ", tid " << tid << ", intitial\n" << stat << "\n"; From 44e4f9cd2640a782554285fd814fc3199a13107c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 03:01:15 +0300 Subject: [PATCH 063/192] Removed useless code #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 17 +---------------- dbms/src/Common/TaskStatsInfoGetter.h | 7 +------ 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 22523ae8849..e5c56d15743 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -164,7 +164,7 @@ void TaskStatsInfoGetter::init() } -void TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats) +void TaskStatsInfoGetter::getStat(::taskstats & out_stats, int tid) { init(); @@ -212,27 +212,12 @@ void TaskStatsInfoGetter::getStatImpl(int tid, ::taskstats & out_stats) } -void TaskStatsInfoGetter::getStat(::taskstats & stat, int tid) -{ - tid = tid < 0 ? getDefaultTID() : tid; - getStatImpl(tid, stat); -} - - int TaskStatsInfoGetter::getCurrentTID() { /// This call is always successful. - man gettid return static_cast(syscall(SYS_gettid)); } -int TaskStatsInfoGetter::getDefaultTID() -{ - if (default_tid < 0) - default_tid = getCurrentTID(); - - return default_tid; -} - static bool checkPermissionsImpl() { diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index 1fe1d325ba8..721ed4b02c4 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -18,7 +18,7 @@ public: TaskStatsInfoGetter(); TaskStatsInfoGetter(const TaskStatsInfoGetter &) = delete; - void getStat(::taskstats & stat, int tid = -1); + void getStat(::taskstats & stat, int tid); ~TaskStatsInfoGetter(); @@ -29,11 +29,6 @@ public: static bool checkPermissions(); private: - /// Caches current thread tid to avoid extra sys calls - int getDefaultTID(); - int default_tid = -1; - - void getStatImpl(int tid, ::taskstats & out_stats); void init(); int netlink_socket_fd = -1; From 698761020ff947269312987d014c93f81bad8bd4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 03:05:06 +0300 Subject: [PATCH 064/192] Code cleanups #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 9 +++------ dbms/src/Common/TaskStatsInfoGetter.h | 14 +++++--------- 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index e5c56d15743..fe96762cb77 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -135,9 +135,6 @@ UInt16 getFamilyId(int nl_sock_fd) } -TaskStatsInfoGetter::TaskStatsInfoGetter() = default; - - void TaskStatsInfoGetter::init() { if (netlink_socket_fd >= 0) @@ -164,7 +161,7 @@ void TaskStatsInfoGetter::init() } -void TaskStatsInfoGetter::getStat(::taskstats & out_stats, int tid) +void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) { init(); @@ -212,10 +209,10 @@ void TaskStatsInfoGetter::getStat(::taskstats & out_stats, int tid) } -int TaskStatsInfoGetter::getCurrentTID() +pid_t TaskStatsInfoGetter::getCurrentTID() { /// This call is always successful. - man gettid - return static_cast(syscall(SYS_gettid)); + return static_cast(syscall(SYS_gettid)); } diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index 721ed4b02c4..8ed18a4988d 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -1,6 +1,8 @@ #pragma once +#include #include +#include struct taskstats; @@ -8,22 +10,16 @@ struct taskstats; namespace DB { -class Exception; - - /// Get taskstat info from OS kernel via Netlink protocol. -class TaskStatsInfoGetter +class TaskStatsInfoGetter : private boost::noncopyable { public: - TaskStatsInfoGetter(); - TaskStatsInfoGetter(const TaskStatsInfoGetter &) = delete; - - void getStat(::taskstats & stat, int tid); + void getStat(::taskstats & stat, pid_t tid); ~TaskStatsInfoGetter(); /// Make a syscall and returns Linux thread id - static int getCurrentTID(); + static pid_t getCurrentTID(); /// Whether the current process has permissions (sudo or cap_net_admin capabilties) to get taskstats info static bool checkPermissions(); From 43f1f70c9bffc03f8774a01cf9e0e9a9baa40c2f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 03:24:55 +0300 Subject: [PATCH 065/192] Code cleanups #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 73 +++++++++++--------- dbms/src/Common/TaskStatsInfoGetter.h | 7 +- dbms/src/Common/ThreadProfileEvents.h | 1 - dbms/src/Common/ThreadStatus.cpp | 9 ++- dbms/src/IO/ReadBufferFromFileDescriptor.cpp | 5 +- 5 files changed, 54 insertions(+), 41 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index fe96762cb77..97ba2d5e54a 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -98,7 +98,7 @@ void sendCommand( } -UInt16 getFamilyId(int nl_sock_fd) +UInt16 getFamilyIdImpl(int netlink_socket_fd) { struct { @@ -110,12 +110,12 @@ UInt16 getFamilyId(int nl_sock_fd) static char name[] = TASKSTATS_GENL_NAME; sendCommand( - nl_sock_fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, + netlink_socket_fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, (void *) name, strlen(TASKSTATS_GENL_NAME) + 1); UInt16 id = 0; - ssize_t rep_len = ::recv(nl_sock_fd, &answer, sizeof(answer), 0); + ssize_t rep_len = ::recv(netlink_socket_fd, &answer, sizeof(answer), 0); if (rep_len < 0) throwFromErrno("Cannot get the family id for " + std::string(TASKSTATS_GENL_NAME) + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); @@ -132,12 +132,44 @@ UInt16 getFamilyId(int nl_sock_fd) return id; } + +bool checkPermissionsImpl() +{ + /// See man getcap. + __user_cap_header_struct request{}; + request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. + request.pid = getpid(); + + __user_cap_data_struct response{}; + + /// Avoid dependency on 'libcap'. + if (0 != syscall(SYS_capget, &request, &response)) + throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR); + + return (1 << CAP_NET_ADMIN) & response.effective; } -void TaskStatsInfoGetter::init() +UInt16 getFamilyId(int netlink_socket_fd) { - if (netlink_socket_fd >= 0) + /// It is thread and exception safe since C++11 and even before. + static UInt16 res = getFamilyIdImpl(netlink_socket_fd); + return res; +} + +} + + +bool TaskStatsInfoGetter::checkPermissions() +{ + static bool res = checkPermissionsImpl(); + return res; +} + + +TaskStatsInfoGetter::TaskStatsInfoGetter() +{ + if (!checkPermissions()) return; netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); @@ -157,15 +189,16 @@ void TaskStatsInfoGetter::init() if (::bind(netlink_socket_fd, reinterpret_cast(&addr), sizeof(addr)) < 0) throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); - netlink_family_id = getFamilyId(netlink_socket_fd); + taskstats_family_id = getFamilyId(netlink_socket_fd); } void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) { - init(); + if (!checkPermissions()) + return; - sendCommand(netlink_socket_fd, netlink_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); + sendCommand(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); NetlinkMessage msg; ssize_t rv = ::recv(netlink_socket_fd, &msg, sizeof(msg), 0); @@ -216,30 +249,6 @@ pid_t TaskStatsInfoGetter::getCurrentTID() } -static bool checkPermissionsImpl() -{ - /// See man getcap. - __user_cap_header_struct request{}; - request.version = _LINUX_CAPABILITY_VERSION_1; /// It's enough to check just single CAP_NET_ADMIN capability we are interested. - request.pid = getpid(); - - __user_cap_data_struct response{}; - - /// Avoid dependency on 'libcap'. - if (0 != syscall(SYS_capget, &request, &response)) - throwFromErrno("Cannot do 'capget' syscall", ErrorCodes::NETLINK_ERROR); - - return (1 << CAP_NET_ADMIN) & response.effective; -} - -bool TaskStatsInfoGetter::checkPermissions() -{ - /// It is thread- and exception- safe since C++11 - static bool res = checkPermissionsImpl(); - return res; -} - - TaskStatsInfoGetter::~TaskStatsInfoGetter() { if (netlink_socket_fd >= 0) diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index 8ed18a4988d..f086a884138 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -14,10 +14,11 @@ namespace DB class TaskStatsInfoGetter : private boost::noncopyable { public: - void getStat(::taskstats & stat, pid_t tid); - + TaskStatsInfoGetter(); ~TaskStatsInfoGetter(); + void getStat(::taskstats & stat, pid_t tid); + /// Make a syscall and returns Linux thread id static pid_t getCurrentTID(); @@ -28,7 +29,7 @@ private: void init(); int netlink_socket_fd = -1; - UInt16 netlink_family_id = 0; + UInt16 taskstats_family_id = 0; }; } diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 3a780f509a7..1afcdf98ccc 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -1,5 +1,4 @@ #pragma once -#include #include #include diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index d4a47403a2b..dff33d786ee 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -1,10 +1,13 @@ -#include "ThreadStatus.h" -#include +#include + +#include #include #include #include +#include +#include -#include +#include #include diff --git a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp index 9bacf699cc8..70cc84567f3 100644 --- a/dbms/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/dbms/src/IO/ReadBufferFromFileDescriptor.cpp @@ -67,8 +67,9 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (res > 0) bytes_read += res; - /// NOTE: it is quite inaccurate on high loads since the thread could be replaced by another one and we will count cpu time of other thread - /// It is better to use taskstats::blkio_delay_total, but it is quite expensive to get it (TaskStatsInfoGetter has about 500K RPS) + /// It reports real time spent including the time spent while thread was preempted doing nothing. + /// And it is Ok for the purpose of this watch (it is used to lower the number of threads to read from tables). + /// Sometimes it is better to use taskstats::blkio_delay_total, but it is quite expensive to get it (TaskStatsInfoGetter has about 500K RPS). watch.stop(); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); From efca6e4e0979594cf75d23daa5c0409c3612d77c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 03:41:30 +0300 Subject: [PATCH 066/192] Code cleanups #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 7 +++---- dbms/src/Common/ThreadStatus.cpp | 10 ++++++---- dbms/src/Common/ThreadStatus.h | 5 ++--- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 97ba2d5e54a..e09ec3afee5 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -31,6 +31,7 @@ namespace DB namespace ErrorCodes { extern const int NETLINK_ERROR; + extern const int LOGICAL_ERROR; } @@ -170,7 +171,8 @@ bool TaskStatsInfoGetter::checkPermissions() TaskStatsInfoGetter::TaskStatsInfoGetter() { if (!checkPermissions()) - return; + throw Exception("Logical error: TaskStatsInfoGetter is not usable without CAP_NET_ADMIN. Check permissions before creating the object.", + ErrorCodes::LOGICAL_ERROR); netlink_socket_fd = ::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (netlink_socket_fd < 0) @@ -195,9 +197,6 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) { - if (!checkPermissions()) - return; - sendCommand(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); NetlinkMessage msg; diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index dff33d786ee..0ed242218f5 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -42,7 +42,6 @@ ThreadStatus::ThreadStatus() last_rusage = std::make_unique(); last_taskstats = std::make_unique(); - taskstats_getter = std::make_unique(); memory_tracker.setDescription("(for thread)"); log = &Poco::Logger::get("ThreadStatus"); @@ -73,9 +72,12 @@ void ThreadStatus::initPerformanceCounters() ++queries_started; *last_rusage = RUsageCounters::current(query_start_time_nanoseconds); - has_permissions_for_taskstats = TaskStatsInfoGetter::checkPermissions(); - if (has_permissions_for_taskstats) + + if (TaskStatsInfoGetter::checkPermissions()) + { + taskstats_getter = std::make_unique(); *last_taskstats = TasksStatsCounters::current(); + } } void ThreadStatus::updatePerformanceCounters() @@ -83,7 +85,7 @@ void ThreadStatus::updatePerformanceCounters() try { RUsageCounters::updateProfileEvents(*last_rusage, performance_counters); - if (has_permissions_for_taskstats) + if (taskstats_getter) TasksStatsCounters::updateProfileEvents(*last_taskstats, performance_counters); } catch (...) diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index b708b3dce03..b550f416558 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -126,7 +126,6 @@ public: ~ThreadStatus(); protected: - ThreadStatus(); void initPerformanceCounters(); @@ -160,11 +159,11 @@ protected: /// Use ptr not to add extra dependencies in the header std::unique_ptr last_rusage; std::unique_ptr last_taskstats; + + /// Set only if we have enough capabilities. std::unique_ptr taskstats_getter; - bool has_permissions_for_taskstats = false; public: - /// Implicitly finalizes current thread in the destructor class CurrentThreadScope { From d4b038b270da098e3ee65972ef117d510b4711b4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 05:54:24 +0300 Subject: [PATCH 067/192] TaskStats: rewrite code #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 248 +++++++++++++----------- 1 file changed, 138 insertions(+), 110 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index e09ec3afee5..02c539f5528 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -2,6 +2,8 @@ #include #include +#include + #include #include #include @@ -19,11 +21,6 @@ /// Basic idea is motivated by "iotop" tool. /// More info: https://www.kernel.org/doc/Documentation/accounting/taskstats.txt -#define GENLMSG_DATA(glh) ((void *)((char*)NLMSG_DATA(glh) + GENL_HDRLEN)) -#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) -#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN)) -#define NLA_PAYLOAD(len) (len - NLA_HDRLEN) - namespace DB { @@ -38,99 +35,146 @@ namespace ErrorCodes namespace { -static size_t constexpr MAX_MSG_SIZE = 1024; - +/** The message contains: + * - Netlink protocol header; + * - Generic Netlink (is a sub-protocol of Netlink that we use) protocol header; + * - Payload + * -- that itself is a list of "Attributes" (sub-messages), each of them contains length (including header), type, and its own payload. + * -- and attribute payload may be represented by the list of embedded attributes. + */ struct NetlinkMessage { - ::nlmsghdr n; - ::genlmsghdr g; - char buf[MAX_MSG_SIZE]; + alignas(NLMSG_ALIGNTO) ::nlmsghdr header; + + struct Attribute + { + ::nlattr header; + + alignas(NLMSG_ALIGNTO) char payload[0]; + + const Attribute * next() const + { + return reinterpret_cast(reinterpret_cast(this) + NLA_ALIGN(header.nla_len)); + } + }; + + alignas(NLMSG_ALIGNTO) union + { + struct + { + ::genlmsghdr generic_header; + + alignas(NLMSG_ALIGNTO) union + { + static size_t constexpr MAX_MSG_SIZE = 1024; + + char buf[MAX_MSG_SIZE]; + Attribute attribute; /// First attribute. There may be more. + } payload; + }; + + ::nlmsgerr error; + }; + + size_t payload_size() const + { + return header.nlmsg_len - sizeof(header) - sizeof(generic_header); + } + + const Attribute * end() const + { + return reinterpret_cast(reinterpret_cast(this) + header.nlmsg_len); + } + + void send(int fd) const + { + const char * request_buf = reinterpret_cast(this); + ssize_t request_size = header.nlmsg_len; + + ::sockaddr_nl nladdr{}; + nladdr.nl_family = AF_NETLINK; + + while (true) + { + ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, reinterpret_cast(&nladdr), sizeof(nladdr)); + + if (bytes_sent <= 0) + { + if (errno == EAGAIN) + continue; + else + throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); + } + + if (bytes_sent > request_size) + throw Exception("Wrong result of sendto system call: bytes_sent is greater than request size", ErrorCodes::NETLINK_ERROR); + + if (bytes_sent == request_size) + break; + + request_buf += bytes_sent; + request_size -= bytes_sent; + } + } + + void receive(int fd) + { + ssize_t bytes_received = ::recv(fd, this, sizeof(*this), 0); + + if (header.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&header), bytes_received)) + throw Exception("Can't receive Netlink response, error: " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); + } }; -void sendCommand( - int sock_fd, - UInt16 nlmsg_type, - UInt32 nlmsg_pid, - UInt8 genl_cmd, - UInt16 nla_type, - void * nla_data, - int nla_len) +NetlinkMessage query( + int fd, + UInt16 type, + UInt32 pid, + UInt8 command, + UInt16 attribute_type, + const void * attribute_data, + int attribute_size) { - NetlinkMessage msg{}; + NetlinkMessage request; - msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); - msg.n.nlmsg_type = nlmsg_type; - msg.n.nlmsg_flags = NLM_F_REQUEST; - msg.n.nlmsg_seq = 0; - msg.n.nlmsg_pid = nlmsg_pid; - msg.g.cmd = genl_cmd; - msg.g.version = 1; + request.header.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); /// Length of both headers. + request.header.nlmsg_type = type; + request.header.nlmsg_flags = NLM_F_REQUEST; /// A request. + request.header.nlmsg_seq = 0; + request.header.nlmsg_pid = pid; - ::nlattr * attr = static_cast<::nlattr *>(GENLMSG_DATA(&msg)); - attr->nla_type = nla_type; - attr->nla_len = nla_len + 1 + NLA_HDRLEN; + request.generic_header.cmd = command; + request.generic_header.version = 1; - memcpy(NLA_DATA(attr), nla_data, nla_len); - msg.n.nlmsg_len += NLMSG_ALIGN(attr->nla_len); + request.payload.attribute.header.nla_type = attribute_type; + request.payload.attribute.header.nla_len = attribute_size + 1 + NLA_HDRLEN; - char * buf = reinterpret_cast(&msg); - ssize_t buflen = msg.n.nlmsg_len; + memcpy(&request.payload.attribute.payload, attribute_data, attribute_size); - ::sockaddr_nl nladdr{}; - nladdr.nl_family = AF_NETLINK; + request.header.nlmsg_len += NLMSG_ALIGN(request.payload.attribute.header.nla_len); - while (true) - { - ssize_t r = ::sendto(sock_fd, buf, buflen, 0, reinterpret_cast(&nladdr), sizeof(nladdr)); + request.send(fd); - if (r >= buflen) - break; + NetlinkMessage response; + response.receive(fd); - if (r > 0) - { - buf += r; - buflen -= r; - } - else if (errno != EAGAIN) - throwFromErrno("Can't send a Netlink command", ErrorCodes::NETLINK_ERROR); - } + return response; } -UInt16 getFamilyIdImpl(int netlink_socket_fd) +UInt16 getFamilyIdImpl(int fd) { - struct - { - ::nlmsghdr header; - ::genlmsghdr ge_header; - char buf[256]; - } answer; + NetlinkMessage answer = query(fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, CTRL_ATTR_FAMILY_NAME, TASKSTATS_GENL_NAME, strlen(TASKSTATS_GENL_NAME) + 1); - static char name[] = TASKSTATS_GENL_NAME; + /// NOTE Why the relevant info is located in the second attribute? + const NetlinkMessage::Attribute * attr = answer.payload.attribute.next(); - sendCommand( - netlink_socket_fd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, - CTRL_ATTR_FAMILY_NAME, (void *) name, - strlen(TASKSTATS_GENL_NAME) + 1); + if (attr->header.nla_type != CTRL_ATTR_FAMILY_ID) + throw Exception("Received wrong attribute as an answer to GET_FAMILY Netlink command", ErrorCodes::NETLINK_ERROR); - UInt16 id = 0; - ssize_t rep_len = ::recv(netlink_socket_fd, &answer, sizeof(answer), 0); - if (rep_len < 0) - throwFromErrno("Cannot get the family id for " + std::string(TASKSTATS_GENL_NAME) + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); - - if (answer.header.nlmsg_type == NLMSG_ERROR ||!NLMSG_OK((&answer.header), rep_len)) - throw Exception("Received an error instead of the family id for " + std::string(TASKSTATS_GENL_NAME) - + " from the Netlink socket", ErrorCodes::NETLINK_ERROR); - - const ::nlattr * attr; - attr = static_cast(GENLMSG_DATA(&answer)); - attr = reinterpret_cast(reinterpret_cast(attr) + NLA_ALIGN(attr->nla_len)); - if (attr->nla_type == CTRL_ATTR_FAMILY_ID) - id = *static_cast(NLA_DATA(attr)); - - return id; + return unalignedLoad(attr->payload); } @@ -151,10 +195,10 @@ bool checkPermissionsImpl() } -UInt16 getFamilyId(int netlink_socket_fd) +UInt16 getFamilyId(int fd) { /// It is thread and exception safe since C++11 and even before. - static UInt16 res = getFamilyIdImpl(netlink_socket_fd); + static UInt16 res = getFamilyIdImpl(fd); return res; } @@ -178,6 +222,9 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() if (netlink_socket_fd < 0) throwFromErrno("Can't create PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); + /// On some containerized environments, operation on Netlink socket could hang forever. + /// We set reasonably small timeout to overcome this issue. + struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 50000; @@ -197,47 +244,28 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() void TaskStatsInfoGetter::getStat(::taskstats & out_stats, pid_t tid) { - sendCommand(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(pid_t)); + NetlinkMessage answer = query(netlink_socket_fd, taskstats_family_id, tid, TASKSTATS_CMD_GET, TASKSTATS_CMD_ATTR_PID, &tid, sizeof(tid)); - NetlinkMessage msg; - ssize_t rv = ::recv(netlink_socket_fd, &msg, sizeof(msg), 0); - - if (msg.n.nlmsg_type == NLMSG_ERROR || !NLMSG_OK((&msg.n), rv)) + for (const NetlinkMessage::Attribute * attr = &answer.payload.attribute; + attr < answer.end(); + attr = attr->next()) { - const ::nlmsgerr * err = static_cast(NLMSG_DATA(&msg)); - throw Exception("Can't get Netlink response, error: " + std::to_string(err->error), ErrorCodes::NETLINK_ERROR); - } - - rv = GENLMSG_PAYLOAD(&msg.n); - - const ::nlattr * attr = static_cast(GENLMSG_DATA(&msg)); - ssize_t len = 0; - - while (len < rv) - { - len += NLA_ALIGN(attr->nla_len); - - if (attr->nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->nla_type == TASKSTATS_TYPE_AGGR_PID) + if (attr->header.nla_type == TASKSTATS_TYPE_AGGR_TGID || attr->header.nla_type == TASKSTATS_TYPE_AGGR_PID) { - int aggr_len = NLA_PAYLOAD(attr->nla_len); - int len2 = 0; - - attr = static_cast(NLA_DATA(attr)); - while (len2 < aggr_len) + for (const NetlinkMessage::Attribute * nested_attr = reinterpret_cast(attr->payload); + nested_attr < attr->next(); + nested_attr = nested_attr->next()) { - if (attr->nla_type == TASKSTATS_TYPE_STATS) + if (nested_attr->header.nla_type == TASKSTATS_TYPE_STATS) { - const ::taskstats * ts = static_cast(NLA_DATA(attr)); - out_stats = *ts; + out_stats = unalignedLoad<::taskstats>(nested_attr->payload); + return; } - - len2 += NLA_ALIGN(attr->nla_len); - attr = reinterpret_cast(reinterpret_cast(attr) + len2); } } - - attr = reinterpret_cast(reinterpret_cast(GENLMSG_DATA(&msg)) + len); } + + throw Exception("There is no TASKSTATS_TYPE_STATS attribute in the Netlink response", ErrorCodes::NETLINK_ERROR); } From e220a5041f5319071e91d61df04c5b29b5652fe2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 06:06:13 +0300 Subject: [PATCH 068/192] Better location #2877 --- dbms/programs/client/Client.cpp | 60 +++++++++++++++++---------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index a94e6865730..f5a66e4dd15 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -482,6 +482,37 @@ private: Poco::File(history_file).createFile(); } +#if USE_READLINE + /// Install Ctrl+C signal handler that will be used in interactive mode. + + if (rl_initialize()) + throw Exception("Cannot initialize readline", ErrorCodes::CANNOT_READLINE); + + auto clear_prompt_or_exit = [](int) + { + /// This is signal safe. + ssize_t res = write(STDOUT_FILENO, "\n", 1); + + /// Allow to quit client while query is in progress by pressing Ctrl+C twice. + /// (First press to Ctrl+C will try to cancel query by InterruptListener). + if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) + { + rl_replace_line("", 0); + if (rl_forced_update_display()) + _exit(0); + } + else + { + /// A little dirty, but we struggle to find better way to correctly + /// force readline to exit after returning from the signal handler. + _exit(0); + } + }; + + if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR) + throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); +#endif + loop(); std::cout << (isNewYearMode() ? "Happy new year." : "Bye.") << std::endl; @@ -1518,35 +1549,6 @@ public: } } -#if USE_READLINE - if (rl_initialize()) - throw Exception("Cannot initialize readline", ErrorCodes::CANNOT_READLINE); - - auto clear_prompt_or_exit = [](int) - { - /// This is signal safe. - ssize_t res = write(STDOUT_FILENO, "\n", 1); - - /// Allow to quit client while query is in progress by pressing Ctrl+C twice. - /// (First press to Ctrl+C will try to cancel query by InterruptListener). - if (res == 1 && rl_line_buffer[0] && !RL_ISSTATE(RL_STATE_DONE)) - { - rl_replace_line("", 0); - if (rl_forced_update_display()) - _exit(0); - } - else - { - /// A little dirty, but we struggle to find better way to correctly - /// force readline to exit after returning from the signal handler. - _exit(0); - } - }; - - if (signal(SIGINT, clear_prompt_or_exit) == SIG_ERR) - throwFromErrno("Cannot set signal handler.", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER); -#endif - ioctl(0, TIOCGWINSZ, &terminal_size); namespace po = boost::program_options; From f5326ed29aece2ce686c9f8eb14932f347ac046e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 06:11:46 +0300 Subject: [PATCH 069/192] TaskStats: addition to prev. revision #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 02c539f5528..438be6912e0 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -45,6 +45,8 @@ namespace */ struct NetlinkMessage { + static size_t constexpr MAX_MSG_SIZE = 1024; + alignas(NLMSG_ALIGNTO) ::nlmsghdr header; struct Attribute @@ -67,8 +69,6 @@ struct NetlinkMessage alignas(NLMSG_ALIGNTO) union { - static size_t constexpr MAX_MSG_SIZE = 1024; - char buf[MAX_MSG_SIZE]; Attribute attribute; /// First attribute. There may be more. } payload; From 8c76b8e87563a2de4ac5cec2d1912c44a749d778 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 06:12:55 +0300 Subject: [PATCH 070/192] TaskStats: addition to prev. revision #2482 --- dbms/src/Common/TaskStatsInfoGetter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 438be6912e0..df883d09dc4 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -61,13 +61,13 @@ struct NetlinkMessage } }; - alignas(NLMSG_ALIGNTO) union + union alignas(NLMSG_ALIGNTO) { struct { ::genlmsghdr generic_header; - alignas(NLMSG_ALIGNTO) union + union alignas(NLMSG_ALIGNTO) { char buf[MAX_MSG_SIZE]; Attribute attribute; /// First attribute. There may be more. From fc9d335d417fef4aaaf52ac8b476cdc68b53de63 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 06:33:46 +0300 Subject: [PATCH 071/192] Fixed error when empty part is generated after merge or mutation #2830 --- .../MergeTree/MergedBlockOutputStream.cpp | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index fd2fdb50897..98de7b0399c 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -298,16 +298,19 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( MergeTreeData::DataPart::Checksums * additional_column_checksums) { /// Finish columns serialization. - auto & settings = storage.context.getSettingsRef(); - IDataType::SerializeBinaryBulkSettings serialize_settings; - serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; - serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part != 0; - OffsetColumns offset_columns; - auto it = columns_list.begin(); - for (size_t i = 0; i < columns_list.size(); ++i, ++it) + if (!serialization_states.empty()) { - serialize_settings.getter = createStreamGetter(it->name, offset_columns, false); - it->type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[i]); + auto & settings = storage.context.getSettingsRef(); + IDataType::SerializeBinaryBulkSettings serialize_settings; + serialize_settings.low_cardinality_max_dictionary_size = settings.low_cardinality_max_dictionary_size; + serialize_settings.low_cardinality_use_single_dictionary_for_part = settings.low_cardinality_use_single_dictionary_for_part != 0; + OffsetColumns offset_columns; + auto it = columns_list.begin(); + for (size_t i = 0; i < columns_list.size(); ++i, ++it) + { + serialize_settings.getter = createStreamGetter(it->name, offset_columns, false); + it->type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[i]); + } } if (!total_column_list) From ca5b83ac399554731f421ad0cb85a6484cb1ebbf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 06:58:19 +0300 Subject: [PATCH 072/192] Fixed error when BackgroundSchedulePool is initialized in context of a query #2482 --- dbms/src/Common/ThreadStatus.h | 1 - dbms/src/Core/BackgroundSchedulePool.cpp | 25 +++++++---- .../MergeTree/BackgroundProcessingPool.cpp | 42 +++++++++++-------- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index b550f416558..045cfde3a46 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -33,7 +33,6 @@ using InternalTextLogsQueueWeakPtr = std::weak_ptr; class ThreadGroupStatus { public: - mutable std::shared_mutex mutex; ProfileEvents::Counters performance_counters{VariableContext::Process}; diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index 9cdec4087a4..e553b1e7623 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -143,12 +143,6 @@ BackgroundSchedulePool::BackgroundSchedulePool(size_t size) { LOG_INFO(&Logger::get("BackgroundSchedulePool"), "Create BackgroundSchedulePool with " << size << " threads"); - /// Put all threads of both thread pools to one thread group - /// The master thread exits immediately - CurrentThread::initializeQuery(); - thread_group = CurrentThread::getGroup(); - CurrentThread::detachQuery(); - threads.resize(size); for (auto & thread : threads) thread = std::thread([this] { threadFunction(); }); @@ -221,10 +215,23 @@ void BackgroundSchedulePool::threadFunction() { setThreadName("BackgrSchedPool"); - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); + { + std::lock_guard lock(delayed_tasks_mutex); + if (thread_group) + { + /// Put all threads to one thread pool + CurrentThread::attachTo(thread_group); + } + else + { + CurrentThread::initializeQuery(); + thread_group = CurrentThread::getGroup(); + } + } + + /// Put all threads to one thread pool + SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); CurrentThread::getMemoryTracker().setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool); while (!shutdown) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index c7e49f59e7a..d7a0294a1ab 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -36,7 +36,7 @@ void BackgroundProcessingPoolTaskInfo::wake() Poco::Timestamp current_time; { - std::unique_lock lock(pool.tasks_mutex); + std::unique_lock lock(pool.tasks_mutex); auto next_time_to_execute = iterator->first; auto this_task_handle = iterator->second; @@ -58,12 +58,6 @@ BackgroundProcessingPool::BackgroundProcessingPool(int size_) : size(size_) { LOG_INFO(&Logger::get("BackgroundProcessingPool"), "Create BackgroundProcessingPool with " << size << " threads"); - /// Put all threads to one thread group - /// The master thread exits immediately - CurrentThread::initializeQuery(); - thread_group = CurrentThread::getGroup(); - CurrentThread::detachQuery(); - threads.resize(size); for (auto & thread : threads) thread = std::thread([this] { threadFunction(); }); @@ -77,7 +71,7 @@ BackgroundProcessingPool::TaskHandle BackgroundProcessingPool::addTask(const Tas Poco::Timestamp current_time; { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); res->iterator = tasks.emplace(current_time, res); } @@ -93,11 +87,11 @@ void BackgroundProcessingPool::removeTask(const TaskHandle & task) /// Wait for all executions of this task. { - std::unique_lock wlock(task->rwlock); + std::unique_lock wlock(task->rwlock); } { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); tasks.erase(task->iterator); } } @@ -122,10 +116,22 @@ void BackgroundProcessingPool::threadFunction() { setThreadName("BackgrProcPool"); - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); - SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); + { + std::lock_guard lock(tasks_mutex); + if (thread_group) + { + /// Put all threads to one thread pool + CurrentThread::attachTo(thread_group); + } + else + { + CurrentThread::initializeQuery(); + thread_group = CurrentThread::getGroup(); + } + } + + SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); CurrentThread::getMemoryTracker().setMetric(CurrentMetrics::MemoryTrackingInBackgroundProcessingPool); pcg64 rng(randomSeed()); @@ -141,7 +147,7 @@ void BackgroundProcessingPool::threadFunction() Poco::Timestamp min_time; { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); if (!tasks.empty()) { @@ -162,7 +168,7 @@ void BackgroundProcessingPool::threadFunction() if (!task) { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); wake_event.wait_for(lock, std::chrono::duration(sleep_seconds + std::uniform_real_distribution(0, sleep_seconds_random_part)(rng))); @@ -173,12 +179,12 @@ void BackgroundProcessingPool::threadFunction() Poco::Timestamp current_time; if (min_time > current_time) { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); wake_event.wait_for(lock, std::chrono::microseconds( min_time - current_time + std::uniform_int_distribution(0, sleep_seconds_random_part * 1000000)(rng))); } - std::shared_lock rlock(task->rwlock); + std::shared_lock rlock(task->rwlock); if (task->removed) continue; @@ -202,7 +208,7 @@ void BackgroundProcessingPool::threadFunction() Poco::Timestamp next_time_to_execute = Poco::Timestamp() + (done_work ? 0 : sleep_seconds * 1000000); { - std::unique_lock lock(tasks_mutex); + std::unique_lock lock(tasks_mutex); if (task->removed) continue; From deb3ce534c7e08c51f5d972e073dc730e9fc94c1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 07:04:02 +0300 Subject: [PATCH 073/192] Fixed error when BackgroundSchedulePool is initialized in context of a query #2482 --- dbms/src/Core/BackgroundSchedulePool.cpp | 37 ++++++++++++------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index e553b1e7623..c9abf559092 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -211,26 +211,28 @@ void BackgroundSchedulePool::cancelDelayedTask(const TaskInfoPtr & task, std::lo } +void BackgroundSchedulePool::attachToThreadGroup() +{ + std::lock_guard lock(delayed_tasks_mutex); + + if (thread_group) + { + /// Put all threads to one thread pool + CurrentThread::attachTo(thread_group); + } + else + { + CurrentThread::initializeQuery(); + thread_group = CurrentThread::getGroup(); + } +} + + void BackgroundSchedulePool::threadFunction() { setThreadName("BackgrSchedPool"); - { - std::lock_guard lock(delayed_tasks_mutex); - - if (thread_group) - { - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); - } - else - { - CurrentThread::initializeQuery(); - thread_group = CurrentThread::getGroup(); - } - } - - /// Put all threads to one thread pool + attachToThreadGroup(); SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); CurrentThread::getMemoryTracker().setMetric(CurrentMetrics::MemoryTrackingInBackgroundSchedulePool); @@ -249,8 +251,7 @@ void BackgroundSchedulePool::delayExecutionThreadFunction() { setThreadName("BckSchPoolDelay"); - /// Put all threads to one thread pool - CurrentThread::attachTo(thread_group); + attachToThreadGroup(); SCOPE_EXIT({ CurrentThread::detachQueryIfNotDetached(); }); while (!shutdown) From 6834401118626ce4c1d925aed6c2504cd07dbb2d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 07:04:39 +0300 Subject: [PATCH 074/192] Fixed error when BackgroundSchedulePool is initialized in context of a query #2482 --- dbms/src/Core/BackgroundSchedulePool.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index f55cc95dbbc..b7aa1592c19 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -142,6 +142,8 @@ private: /// Thread group used for profiling purposes ThreadGroupStatusPtr thread_group; + + void attachToThreadGroup(); }; using BackgroundSchedulePoolPtr = std::shared_ptr; From 5435dc3dc2d1312bbbeb378b8de59515ecea5daf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 07:32:21 +0300 Subject: [PATCH 075/192] Fixed error with thread statuses #2482 --- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 022366cbc04..608bc06b713 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -300,7 +300,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(ThreadGroupSt try { if (thread_group) - CurrentThread::attachTo(thread_group); + CurrentThread::attachToIfDetached(thread_group); setThreadName("MergeAggMergThr"); while (!parallel_merge_data->finish) From d355aa3932044c1f38d5a55d9f4812ac96364e04 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 07:36:53 +0300 Subject: [PATCH 076/192] Enlarged the size of system log queue 1000 times. It is reasonable, because multiple records are generated for each query for query_thread_log. The maximum size of system log queue will be few hundred MBs #2482 --- dbms/src/Interpreters/SystemLog.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/SystemLog.h b/dbms/src/Interpreters/SystemLog.h index 5bf3dcfd200..21c0cd8ba1c 100644 --- a/dbms/src/Interpreters/SystemLog.h +++ b/dbms/src/Interpreters/SystemLog.h @@ -51,7 +51,7 @@ namespace DB */ -#define DBMS_SYSTEM_LOG_QUEUE_SIZE 1024 +#define DBMS_SYSTEM_LOG_QUEUE_SIZE 1048576 class Context; class QueryLog; From a1d745888d5e1630807880af991f811b4e14c03d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 08:56:06 +0300 Subject: [PATCH 077/192] Dummy implementation for non-Linux platforms #2482 --- dbms/programs/server/Server.cpp | 4 ++ dbms/src/Common/TaskStatsInfoGetter.cpp | 53 ++++++++++++++++--- dbms/src/Common/TaskStatsInfoGetter.h | 8 ++- dbms/src/Common/ThreadProfileEvents.h | 19 +++++++ dbms/src/IO/AIO.cpp | 2 +- dbms/src/IO/AIO.h | 2 +- dbms/src/IO/AIOContextPool.cpp | 2 +- dbms/src/IO/AIOContextPool.h | 2 +- dbms/src/IO/ReadBufferAIO.cpp | 2 +- dbms/src/IO/ReadBufferAIO.h | 2 +- dbms/src/IO/WriteBufferAIO.cpp | 2 +- dbms/src/IO/WriteBufferAIO.h | 2 +- dbms/src/IO/createReadBufferFromFileBase.cpp | 8 +-- dbms/src/IO/createWriteBufferFromFileBase.cpp | 8 +-- dbms/src/Interpreters/tests/CMakeLists.txt | 6 ++- 15 files changed, 96 insertions(+), 26 deletions(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index b22ab82559d..17f49ef6ddd 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -366,12 +366,16 @@ int Server::main(const std::vector & /*args*/) dns_cache_updater = std::make_unique(*global_context); } +#if defined(__linux__) if (!TaskStatsInfoGetter::checkPermissions()) { LOG_INFO(log, "It looks like the process has no CAP_NET_ADMIN capability, some performance statistics will be disabled." " It could happen due to incorrect ClickHouse package installation." " You could resolve the problem manually with 'sudo setcap cap_net_admin=+ep /usr/bin/clickhouse'"); } +#else + LOG_INFO(log, "TaskStats is not implemented for this OS. IO accounting will be disabled."); +#endif { Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0); diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index df883d09dc4..07bf502d1ac 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -2,20 +2,22 @@ #include #include +#include + +#if defined(__linux__) + #include #include -#include -#include -#include -#include #include #include #include #include -#include -#include #include +#include +#include +#include +#include /// Basic idea is motivated by "iotop" tool. @@ -283,3 +285,42 @@ TaskStatsInfoGetter::~TaskStatsInfoGetter() } } + + +#else + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +bool TaskStatsInfoGetter::checkPermissions() +{ + return false; +} + + +TaskStatsInfoGetter::TaskStatsInfoGetter() +{ + throw Exception("TaskStats are not implemented for this OS.", ErrorCodes::NOT_IMPLEMENTED); +} + +void TaskStatsInfoGetter::getStat(::taskstats &, pid_t) +{ +} + +pid_t TaskStatsInfoGetter::getCurrentTID() +{ + return 0; +} + +TaskStatsInfoGetter::~TaskStatsInfoGetter() +{ +} + +} + +#endif diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index f086a884138..4ff5b94da37 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -4,7 +4,11 @@ #include #include +#if defined(__linux__) struct taskstats; +#else +struct taskstats {}; +#endif namespace DB @@ -25,11 +29,11 @@ public: /// Whether the current process has permissions (sudo or cap_net_admin capabilties) to get taskstats info static bool checkPermissions(); +#if defined(__linux__) private: - void init(); - int netlink_socket_fd = -1; UInt16 taskstats_family_id = 0; +#endif }; } diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 1afcdf98ccc..106cfa07957 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -1,10 +1,14 @@ #pragma once + #include #include #include #include + +#if defined(__linux__) #include +#endif namespace ProfileEvents @@ -17,6 +21,7 @@ namespace ProfileEvents extern const Event VoluntaryContextSwitches; extern const Event InvoluntaryContextSwitches; +#if defined(__linux__) extern const Event OSIOWaitMicroseconds; extern const Event OSCPUWaitMicroseconds; extern const Event OSCPUVirtualTimeMicroseconds; @@ -24,6 +29,7 @@ namespace ProfileEvents extern const Event OSWriteChars; extern const Event OSReadBytes; extern const Event OSWriteBytes; +#endif } @@ -105,6 +111,8 @@ struct RUsageCounters }; +#if defined(__linux__) + struct TasksStatsCounters { ::taskstats stat; @@ -140,4 +148,15 @@ struct TasksStatsCounters } }; +#else + +struct TasksStatsCounters +{ + static TasksStatsCounters current(); + static void incrementProfileEvents(const TasksStatsCounters &, const TasksStatsCounters &, ProfileEvents::Counters &) {} + static void updateProfileEvents(TasksStatsCounters &, ProfileEvents::Counters &) {} +}; + +#endif + } diff --git a/dbms/src/IO/AIO.cpp b/dbms/src/IO/AIO.cpp index 9c8160919f1..e73319319b1 100644 --- a/dbms/src/IO/AIO.cpp +++ b/dbms/src/IO/AIO.cpp @@ -1,4 +1,4 @@ -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/AIO.h b/dbms/src/IO/AIO.h index e21a316a770..1520db74187 100644 --- a/dbms/src/IO/AIO.h +++ b/dbms/src/IO/AIO.h @@ -1,6 +1,6 @@ #pragma once -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) /// https://stackoverflow.com/questions/20759750/resolving-redefinition-of-timespec-in-time-h #define timespec linux_timespec diff --git a/dbms/src/IO/AIOContextPool.cpp b/dbms/src/IO/AIOContextPool.cpp index 336c03be7dd..1251bb651b3 100644 --- a/dbms/src/IO/AIOContextPool.cpp +++ b/dbms/src/IO/AIOContextPool.cpp @@ -1,4 +1,4 @@ -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/AIOContextPool.h b/dbms/src/IO/AIOContextPool.h index 3e1c4a039d7..8a2d3e4adbe 100644 --- a/dbms/src/IO/AIOContextPool.h +++ b/dbms/src/IO/AIOContextPool.h @@ -1,6 +1,6 @@ #pragma once -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/ReadBufferAIO.cpp b/dbms/src/IO/ReadBufferAIO.cpp index 9243b65e48e..ca50e11db7e 100644 --- a/dbms/src/IO/ReadBufferAIO.cpp +++ b/dbms/src/IO/ReadBufferAIO.cpp @@ -1,4 +1,4 @@ -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/ReadBufferAIO.h b/dbms/src/IO/ReadBufferAIO.h index 77e35f8e35a..a30057565c0 100644 --- a/dbms/src/IO/ReadBufferAIO.h +++ b/dbms/src/IO/ReadBufferAIO.h @@ -1,6 +1,6 @@ #pragma once -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/WriteBufferAIO.cpp b/dbms/src/IO/WriteBufferAIO.cpp index dcd42e3c8fe..e8da7a17add 100644 --- a/dbms/src/IO/WriteBufferAIO.cpp +++ b/dbms/src/IO/WriteBufferAIO.cpp @@ -1,4 +1,4 @@ -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/WriteBufferAIO.h b/dbms/src/IO/WriteBufferAIO.h index f5b01637471..7b8d275dfcd 100644 --- a/dbms/src/IO/WriteBufferAIO.h +++ b/dbms/src/IO/WriteBufferAIO.h @@ -1,6 +1,6 @@ #pragma once -#if !(defined(__FreeBSD__) || defined(__APPLE__) || defined(_MSC_VER)) +#if defined(__linux__) #include #include diff --git a/dbms/src/IO/createReadBufferFromFileBase.cpp b/dbms/src/IO/createReadBufferFromFileBase.cpp index beb73eda861..b16189c9e5d 100644 --- a/dbms/src/IO/createReadBufferFromFileBase.cpp +++ b/dbms/src/IO/createReadBufferFromFileBase.cpp @@ -1,6 +1,6 @@ #include #include -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) +#if defined(__linux__) #include #endif #include @@ -14,10 +14,10 @@ namespace ProfileEvents namespace DB { -#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_MSC_VER) +#if !defined(__linux__) namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; + extern const int NOT_IMPLEMENTED; } #endif @@ -31,7 +31,7 @@ std::unique_ptr createReadBufferFromFileBase(const std:: } else { -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) +#if defined(__linux__) ProfileEvents::increment(ProfileEvents::CreatedReadBufferAIO); return std::make_unique(filename_, buffer_size_, flags_, existing_memory_); #else diff --git a/dbms/src/IO/createWriteBufferFromFileBase.cpp b/dbms/src/IO/createWriteBufferFromFileBase.cpp index de06e2eb1cb..b5670b0b16b 100644 --- a/dbms/src/IO/createWriteBufferFromFileBase.cpp +++ b/dbms/src/IO/createWriteBufferFromFileBase.cpp @@ -1,6 +1,6 @@ #include #include -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) +#if defined(__linux__) #include #endif #include @@ -15,10 +15,10 @@ namespace ProfileEvents namespace DB { -#if defined(__APPLE__) || defined(__FreeBSD__) || defined(_MSC_VER) +#if !defined(__linux__) namespace ErrorCodes { - extern const int NOT_IMPLEMENTED; + extern const int NOT_IMPLEMENTED; } #endif @@ -33,7 +33,7 @@ WriteBufferFromFileBase * createWriteBufferFromFileBase(const std::string & file } else { -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(_MSC_VER) +#if defined(__linux__) ProfileEvents::increment(ProfileEvents::CreatedWriteBufferAIO); return new WriteBufferAIO(filename_, buffer_size_, flags_, mode, existing_memory_); #else diff --git a/dbms/src/Interpreters/tests/CMakeLists.txt b/dbms/src/Interpreters/tests/CMakeLists.txt index 04808feb926..fb79250cd7c 100644 --- a/dbms/src/Interpreters/tests/CMakeLists.txt +++ b/dbms/src/Interpreters/tests/CMakeLists.txt @@ -48,5 +48,7 @@ add_check(in_join_subqueries_preprocessor) add_executable (users users.cpp) target_link_libraries (users dbms ${Boost_FILESYSTEM_LIBRARY}) -add_executable (internal_iotop internal_iotop.cpp) -target_link_libraries (internal_iotop dbms) +if (OS_LINUX) + add_executable (internal_iotop internal_iotop.cpp) + target_link_libraries (internal_iotop dbms) +endif () From bc30d10b67f729eeae50ca8409e2920bfb7ecb95 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 08:59:27 +0300 Subject: [PATCH 078/192] Dummy implementation for non-Linux platforms #2482 --- dbms/src/Common/ThreadProfileEvents.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 106cfa07957..661597af97b 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -152,6 +152,8 @@ struct TasksStatsCounters struct TasksStatsCounters { + ::taskstats stat; + static TasksStatsCounters current(); static void incrementProfileEvents(const TasksStatsCounters &, const TasksStatsCounters &, ProfileEvents::Counters &) {} static void updateProfileEvents(TasksStatsCounters &, ProfileEvents::Counters &) {} From 6497d3032e867e1db880e2e7a010a27bbe869c4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 22 Aug 2018 09:00:08 +0300 Subject: [PATCH 079/192] Dummy implementation for non-Linux platforms #2482 --- dbms/src/Common/ThreadProfileEvents.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 661597af97b..511f2f33e22 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -154,7 +154,7 @@ struct TasksStatsCounters { ::taskstats stat; - static TasksStatsCounters current(); + static TasksStatsCounters current() { return {}; } static void incrementProfileEvents(const TasksStatsCounters &, const TasksStatsCounters &, ProfileEvents::Counters &) {} static void updateProfileEvents(TasksStatsCounters &, ProfileEvents::Counters &) {} }; From d258c2f5e34302bb02579fca941b22162876b53c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:18:29 +0300 Subject: [PATCH 080/192] Update rounding_functions.md --- docs/ru/query_language/functions/rounding_functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/functions/rounding_functions.md b/docs/ru/query_language/functions/rounding_functions.md index 540e8ff4abe..849c35013b9 100644 --- a/docs/ru/query_language/functions/rounding_functions.md +++ b/docs/ru/query_language/functions/rounding_functions.md @@ -20,7 +20,7 @@ N может быть отрицательным. Реализует [банковское округление](https://en.wikipedia.org/wiki/Rounding#Round_half_to_even), т.е. округление до ближайшего чётного. -**Входные параметры** +**Аргументы функции** - `x` — число для округления. [Тип](../../data_types/index.md#data_types) — любой числовой. - `N` — позиция цифры после запятой, до которой следует округлять. From 34fafdb9015d8d0b34c2da013e5f66765374e219 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:20:05 +0300 Subject: [PATCH 081/192] Update file.md --- docs/ru/query_language/table_functions/file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/query_language/table_functions/file.md index 36338395d35..b2ad6d08c04 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/query_language/table_functions/file.md @@ -2,7 +2,7 @@ # file -Создает таблицу из файла. +Создаёт таблицу из файла. ``` file(path, format, structure) From 33f3841450730df1d78791bb29b7439924b818a3 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:22:06 +0300 Subject: [PATCH 082/192] Update file.md --- docs/ru/query_language/table_functions/file.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/query_language/table_functions/file.md index b2ad6d08c04..c1180e5dd53 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/query_language/table_functions/file.md @@ -11,8 +11,8 @@ file(path, format, structure) **Входные параметры** - `path` — относительный путь до файла от [user_files_path](../../operations/server_settings/settings.md#user_files_path). -- `format` — [формат](../../interfaces/formats.md#formats) файла. Поддерживаются те же форматы, что и для запроса `INSERT`. -- `structure` — структура таблицы. Формат `'colunm-1-name column-1-data-type, colunm-2-name column-2-data-type, ...'`. +- `format` — [формат](../../interfaces/formats.md#formats) файла. +- `structure` — структура таблицы. Формат `'colunmn1_name column1_ype, column2_name column2_type, ...'`. **Возвращаемое значение** From bcd862ab7449e7f948d513b03922c27899143e91 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:23:28 +0300 Subject: [PATCH 083/192] Update file.md --- docs/ru/query_language/table_functions/file.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/query_language/table_functions/file.md b/docs/ru/query_language/table_functions/file.md index c1180e5dd53..9e029a6b729 100644 --- a/docs/ru/query_language/table_functions/file.md +++ b/docs/ru/query_language/table_functions/file.md @@ -16,7 +16,7 @@ file(path, format, structure) **Возвращаемое значение** -Таблица с указанной структурой и данными из указанного файла. +Таблица с указанной структурой, предназначенная для чтения или записи данных в указанном файле. **Пример** From 90788dbd2eb1a0df294c9f7cb3e02b388fbbdac1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:29:36 +0300 Subject: [PATCH 084/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index d9c239030d6..830daa0e9e7 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -19,7 +19,7 @@ - Поддерживает репликацию данных. - Для этого требуется [преобразование](replication.md#convert-mergetree-to-replicated) `MergeTree` к `ReplicatedMergeTree`. Подробнее читайте в разделе [Репликация данных](replication.md#table_engines-replication). + Для этого используется семейство таблиц `ReplicatedMergeTree`. Подробнее читайте в разделе [Репликация данных](replication.md#table_engines-replication). - Поддерживает сэмплирование данных. From df8bf949d086c2cbece7ece9fcf75b03e904a01b Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:30:23 +0300 Subject: [PATCH 085/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 830daa0e9e7..dbd9b1247e6 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -26,9 +26,6 @@ При необходимости можно задать способ сэмплирования данных в таблице. -!!!attention -При создании таблицы помните, что она обязательно должна содержать столбец с датой типа [Date](../../data_types/date.md#data_type-date). - ## Конфигурирование движка при создании таблицы ``` From 6de5f7df1eff0f7396ae1e6ab592ab20b7d1f0bf Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:32:16 +0300 Subject: [PATCH 086/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index dbd9b1247e6..df2104d48fe 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -42,8 +42,9 @@ ENGINE [=] MergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] [SET Тип — [Tuple()](../../data_types/tuple.md#data_type-tuple). Может состоять из произвольных выражений, но обычно это кортеж столбцов. Обязательно должен включать в себя выражение для сэмплирования, если оно задано. -- `SAMPLE BY` — выражение для сэмплирования. -- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: +- `SAMPLE BY` — выражение для сэмплирования (не обязательно). + +- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (не обязательно): - `index_granularity` — гранулярность индекса. Число строк данных между «засечками» индекса. По умолчанию — 8192. From 33479b6d6c47dd6b3c6af21afa19e14609a78bde Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:37:05 +0300 Subject: [PATCH 087/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index df2104d48fe..841881f1974 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -40,9 +40,10 @@ ENGINE [=] MergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] [SET - `ORDER BY` — первичный ключ. - Тип — [Tuple()](../../data_types/tuple.md#data_type-tuple). Может состоять из произвольных выражений, но обычно это кортеж столбцов. Обязательно должен включать в себя выражение для сэмплирования, если оно задано. + Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. + Если используется ключ сэмплирования, то первичный ключ должен содержать его. Пример: `ORDER BY (CounerID, EventDate, intHash32(UserID))`. -- `SAMPLE BY` — выражение для сэмплирования (не обязательно). +- `SAMPLE BY` — выражение для сэмплирования (не обязательно). Пример: `intHash32(UserID))`. - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (не обязательно): From 622c4b84d95d149e12dc4581fc20d73e3fd6eaa2 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:37:35 +0300 Subject: [PATCH 088/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 841881f1974..a1c45bfc09b 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -34,15 +34,15 @@ ENGINE [=] MergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] [SET **Секции ENGINE** -- `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). - - Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../data_types/date.md#data_type-date). В этом случае имена партиций имеют формат `"YYYYMM"`. - - `ORDER BY` — первичный ключ. Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. Если используется ключ сэмплирования, то первичный ключ должен содержать его. Пример: `ORDER BY (CounerID, EventDate, intHash32(UserID))`. +- `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). + + Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../data_types/date.md#data_type-date). В этом случае имена партиций имеют формат `"YYYYMM"`. + - `SAMPLE BY` — выражение для сэмплирования (не обязательно). Пример: `intHash32(UserID))`. - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (не обязательно): From 87fb9cc997393c26fb0a351c18781c248bc1de12 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:38:01 +0300 Subject: [PATCH 089/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index a1c45bfc09b..54021cd0c62 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -36,7 +36,7 @@ ENGINE [=] MergeTree() [PARTITION BY expr] [ORDER BY expr] [SAMPLE BY expr] [SET - `ORDER BY` — первичный ключ. - Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. + Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. Если используется ключ сэмплирования, то первичный ключ должен содержать его. Пример: `ORDER BY (CounerID, EventDate, intHash32(UserID))`. - `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). From 4c0b30fb7d21efe65304afe7df19e1fa30c27088 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 09:44:04 +0300 Subject: [PATCH 090/192] Update mergetree.md --- docs/ru/operations/table_engines/mergetree.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 54021cd0c62..4d035bcc216 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -87,15 +87,15 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID) ## Хранение данных -Таблица хранится блоками данных, отсортированных по первичному ключу. Каждый блок маркируется максимальной и минимальной датами хранящихся в нём записей. +Таблица состоит из *кусков* данных (data parts), отсортированных по первичному ключу. -При вставке в таблицу создаются отдельные блоки данных, каждый из которых лексикографически отсортирован по первичному ключу. Например, если первичный ключ — `(CounterID, Date)`, то данные в блоке будут лежать в порядке `CounterID`, а для каждого `CounterID` в порядке `Date`. +При вставке в таблицу создаются отдельные куски данных, каждый из которых лексикографически отсортирован по первичному ключу. Например, если первичный ключ — `(CounterID, Date)`, то данные в куске будут лежать в порядке `CounterID`, а для каждого `CounterID` в порядке `Date`. -Данные, относящиеся к разным месяцам разбиваются на разные блоки. В дальнейшем ClickHouse в фоновом режиме объединяет мелкие блоки в более крупные для более эффективного хранения. Блоки, относящиеся к разным месяцам не объединяются, это локализует модификации и упрощает бэкапы. Поддерживается запрос `OPTIMIZE`, который вызывает один внеочередной шаг слияния. +Данные, относящиеся к разным партициям, разбиваются на разные куски. В фоновом режиме ClickHouse выполняет слияния (merge) кусков данных для более эффективного хранения. Куски, относящиеся к разным партициям не объединяются. -Для каждого блока данных ClickHouse создаёт индексный файл, который содержит значение первичного ключа для каждой индексной строки («засечка»). Номера индексных строк определяются как `n * index_granularity`, а максимальное значение `n` равно целой части от деления общего количества строк на `index_granularity`. Для каждого столбца также пишутся «засечки» для тех же индексных строк, что и для первичного ключа, эти «засечки» позволяют находить непосредственно данные в столбцах. +Для каждого куска данных ClickHouse создаёт индексный файл, который содержит значение первичного ключа для каждой индексной строки («засечка»). Номера индексных строк определяются как `n * index_granularity`, а максимальное значение `n` равно целой части от деления общего количества строк на `index_granularity`. Для каждого столбца также пишутся «засечки» для тех же индексных строк, что и для первичного ключа, эти «засечки» позволяют находить непосредственно данные в столбцах. -Вы можете использовать одну большую таблицу, постоянно добавляя в неё данные небольшими пачками, именно для этого предназначен движок `MergeTree`. +Вы можете использовать одну большую таблицу, постоянно добавляя в неё данные пачками, именно для этого предназначен движок `MergeTree`. ## Первичные ключи и индексы в запросах From 19ec609d36abb5f9667b3536ad6554ee559f8495 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 22 Aug 2018 15:42:18 +0300 Subject: [PATCH 091/192] add failing tests with materialized columns #2900 [#CLICKHOUSE-3911] --- .../0_stateless/00652_mergetree_mutations.reference | 9 +++++---- .../queries/0_stateless/00652_mergetree_mutations.sh | 7 ++++--- .../00652_replicated_mutations_zookeeper.reference | 9 +++++---- .../0_stateless/00652_replicated_mutations_zookeeper.sh | 9 +++++---- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference index 5341d7b49aa..b3f4780a9f5 100644 --- a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference +++ b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference @@ -1,13 +1,14 @@ Query should fail 1 Query should fail 2 Query involving aliases should fail on submission -2000-01-01 2 b -2000-01-01 5 e -2000-02-01 2 b -2000-02-01 5 e +2000-01-01 2 b 4 +2000-01-01 5 e 7 +2000-02-01 2 b 4 +2000-02-01 5 e 7 mutation_1.txt DELETE WHERE x = 1 [''] [1] 0 1 mutation_5.txt DELETE WHERE (x % 2) = 1 [''] [5] 0 1 mutation_6.txt DELETE WHERE s = \'d\' [''] [6] 0 1 +mutation_7.txt DELETE WHERE m = 3 [''] [7] 0 1 *** Test mutations cleaner *** mutation_3.txt DELETE WHERE x = 2 1 mutation_4.txt DELETE WHERE x = 3 1 diff --git a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh index 3df1fe014a4..bb29f6d31fe 100755 --- a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh +++ b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations(d Date, x UInt32, s String, a UInt32 ALIAS x + 1) ENGINE MergeTree(d, intDiv(x, 10), 8192)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations(d Date, x UInt32, s String, a UInt32 ALIAS x + 1, m MATERIALIZED x + 2) ENGINE MergeTree(d, intDiv(x, 10), 8192)" # Test a mutation on empty table ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE x = 1" @@ -28,16 +28,17 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE a = 0" 2>/ # Delete some values ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE x % 2 = 1" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE s = 'd'" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE m = 3" # Insert more data ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations(d, x, s) VALUES \ ('2000-01-01', 5, 'e'), ('2000-02-01', 5, 'e')" # Wait until the last mutation is done. -wait_for_mutation "mutations" "mutation_6.txt" +wait_for_mutation "mutations" "mutation_7.txt" # Check that the table contains only the data that should not be deleted. -${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations ORDER BY d, x" +${CLICKHOUSE_CLIENT} --query="SELECT d, x, s, m FROM test.mutations ORDER BY d, x" # Check the contents of the system.mutations table. ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, block_numbers.partition_id, block_numbers.number, parts_to_do, is_done \ FROM system.mutations WHERE table = 'mutations' ORDER BY mutation_id" diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference index cb5a52cb905..cfadf2f5f9a 100644 --- a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference @@ -1,12 +1,13 @@ Query should fail 1 Query should fail 2 -2000-01-01 2 b -2000-01-01 5 e -2000-02-01 2 b -2000-02-01 5 e +2000-01-01 2 b 4 +2000-01-01 5 e 7 +2000-02-01 2 b 4 +2000-02-01 5 e 7 0000000000 DELETE WHERE x = 1 [] [] 0 1 0000000001 DELETE WHERE (x % 2) = 1 ['200001','200002'] [2,1] 0 1 0000000002 DELETE WHERE s = \'d\' ['200001','200002'] [3,2] 0 1 +0000000003 DELETE WHERE m = 3 ['200001','200002'] [4,3] 0 1 *** Test mutations cleaner *** 0000000001 DELETE WHERE x = 2 1 0000000002 DELETE WHERE x = 3 1 diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh index 327b1221482..96f2263afad 100755 --- a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh @@ -8,8 +8,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_r1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_r2" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_r1(d Date, x UInt32, s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations', 'r1', d, intDiv(x, 10), 8192)" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_r2(d Date, x UInt32, s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations', 'r2', d, intDiv(x, 10), 8192)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_r1(d Date, x UInt32, s String, m MATERIALIZED x + 2) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations', 'r1', d, intDiv(x, 10), 8192)" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_r2(d Date, x UInt32, s String, m MATERIALIZED x + 2) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations', 'r2', d, intDiv(x, 10), 8192)" # Test a mutation on empty table ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE x = 1" @@ -28,16 +28,17 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE d = '11 # Delete some values ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE x % 2 = 1" ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE s = 'd'" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE m = 3" # Insert more data ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations_r1(d, x, s) VALUES \ ('2000-01-01', 5, 'e'), ('2000-02-01', 5, 'e')" # Wait until the last mutation is done. -wait_for_mutation "mutations_r2" "0000000002" +wait_for_mutation "mutations_r2" "0000000003" # Check that the table contains only the data that should not be deleted. -${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations_r2 ORDER BY d, x" +${CLICKHOUSE_CLIENT} --query="SELECT d, x, s, m FROM test.mutations_r2 ORDER BY d, x" # Check the contents of the system.mutations table. ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, block_numbers.partition_id, block_numbers.number, parts_to_do, is_done \ FROM system.mutations WHERE table = 'mutations_r2' ORDER BY mutation_id" From 5c748bfdcef1a4f051dcf96aba2ef29228b684a8 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 22 Aug 2018 15:45:24 +0300 Subject: [PATCH 092/192] fix bug when MATERIALIZED columns were not selected during mutation #2900 [#CLICKHOUSE-3911] --- dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 9d92ae1f157..23a0d3d1943 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include @@ -885,7 +885,8 @@ static BlockInputStreamPtr createInputStreamWithMutatedData( select->select_expression_list = std::make_shared(); select->children.push_back(select->select_expression_list); - select->select_expression_list->children.push_back(std::make_shared()); + for (const auto & column : storage->getColumns().getAllPhysical()) + select->select_expression_list->children.push_back(std::make_shared(column.name)); /// For all commands that are in front of the list and are DELETE commands, we can push them down /// to the SELECT statement and remove them from commands. From b165a53e30db89f54b2f1d5ad0f3a03ad24dc94e Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 22 Aug 2018 15:47:06 +0300 Subject: [PATCH 093/192] Fix reading from partial granule for low cardinality type. #2896 --- dbms/src/DataTypes/DataTypeWithDictionary.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/DataTypes/DataTypeWithDictionary.cpp b/dbms/src/DataTypes/DataTypeWithDictionary.cpp index 8877eb820f1..1fcb6caaf68 100644 --- a/dbms/src/DataTypes/DataTypeWithDictionary.cpp +++ b/dbms/src/DataTypes/DataTypeWithDictionary.cpp @@ -669,6 +669,9 @@ void DataTypeWithDictionary::deserializeBinaryBulkWithMultipleStreams( } }; + if (!settings.continuous_reading) + state_with_dictionary->num_pending_rows = 0; + bool first_dictionary = true; while (limit) { From 409d68b8c0f4ef585fb8a4618647a626e7342654 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 22 Aug 2018 15:49:12 +0300 Subject: [PATCH 094/192] Added test for low cardinality with prewhere. #2896 --- .../0_stateless/00688_low_cardinality_prewhere.reference | 1 + .../queries/0_stateless/00688_low_cardinality_prewhere.sql | 6 ++++++ 2 files changed, 7 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.reference create mode 100644 dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.reference b/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.reference new file mode 100644 index 00000000000..997665f4ef9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.reference @@ -0,0 +1 @@ +2282004189 2282004189 diff --git a/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql b/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql new file mode 100644 index 00000000000..adda226c60a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_low_cardinality_prewhere.sql @@ -0,0 +1,6 @@ +set allow_experimental_low_cardinality_type = 1; +drop table if exists test.lc_prewhere; +create table test.lc_prewhere (key UInt64, val UInt64, str StringWithDictionary, s String) engine = MergeTree order by key settings index_granularity = 8192; +insert into test.lc_prewhere select number, if(number < 10 or number > 8192 * 9, 1, 0), toString(number) as s, s from system.numbers limit 100000; +select sum(toUInt64(str)), sum(toUInt64(s)) from test.lc_prewhere prewhere val == 1; +drop table if exists test.lc_prewhere; From ed5c5083c80ecac715f40345056b771c602ab04e Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 16:01:50 +0300 Subject: [PATCH 095/192] wip --- dbms/src/Common/TaskStatsInfoGetter.h | 5 ----- dbms/src/Common/ThreadProfileEvents.h | 4 +++- dbms/src/Common/ThreadStatus.cpp | 2 -- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/TaskStatsInfoGetter.h b/dbms/src/Common/TaskStatsInfoGetter.h index 4ff5b94da37..b3e35d65674 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.h +++ b/dbms/src/Common/TaskStatsInfoGetter.h @@ -4,12 +4,7 @@ #include #include -#if defined(__linux__) struct taskstats; -#else -struct taskstats {}; -#endif - namespace DB { diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 511f2f33e22..af36d051159 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -8,6 +8,8 @@ #if defined(__linux__) #include +#else +struct taskstats {}; #endif @@ -154,7 +156,7 @@ struct TasksStatsCounters { ::taskstats stat; - static TasksStatsCounters current() { return {}; } + static TasksStatsCounters current(); static void incrementProfileEvents(const TasksStatsCounters &, const TasksStatsCounters &, ProfileEvents::Counters &) {} static void updateProfileEvents(TasksStatsCounters &, ProfileEvents::Counters &) {} }; diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index 3259b1ec716..0ed242218f5 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -30,9 +30,7 @@ thread_local ThreadStatus::CurrentThreadScope current_thread_scope; TasksStatsCounters TasksStatsCounters::current() { TasksStatsCounters res; -#if __linux__ current_thread->taskstats_getter->getStat(res.stat, current_thread->os_thread_id); -#endif return res; } From d2729f33e6d0bfa6d5ee2bc7452cdc02bd2dfe8a Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 16:12:20 +0300 Subject: [PATCH 096/192] apple fix --- dbms/src/Common/ThreadProfileEvents.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index af36d051159..46107f7b38d 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -90,7 +90,11 @@ struct RUsageCounters static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds()) { ::rusage rusage; +#if defined(__APPLE__) + ::getrusage(RUSAGE_SELF, &rusage); //TODO? +#else ::getrusage(RUSAGE_THREAD, &rusage); +#endif return RUsageCounters(rusage, real_time_); } From b6aa7542d0f87e00ec9de74db920f563709bf0f4 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 22 Aug 2018 16:43:27 +0300 Subject: [PATCH 097/192] add BackgroundSchedulePool::TaskInfo::activateAndSchedule() method Almost always it (and not activate()) is precisely what is needed. --- dbms/src/Common/ZooKeeper/LeaderElection.h | 3 +- dbms/src/Core/BackgroundSchedulePool.cpp | 38 +++++++++++++------ dbms/src/Core/BackgroundSchedulePool.h | 7 +++- .../ReplicatedMergeTreeAlterThread.h | 6 +-- .../ReplicatedMergeTreeCleanupThread.h | 6 +-- .../ReplicatedMergeTreePartCheckThread.cpp | 3 +- .../ReplicatedMergeTreeRestartingThread.cpp | 6 +-- .../ReplicatedMergeTreeRestartingThread.h | 6 +-- .../Storages/StorageReplicatedMergeTree.cpp | 3 +- 9 files changed, 41 insertions(+), 37 deletions(-) diff --git a/dbms/src/Common/ZooKeeper/LeaderElection.h b/dbms/src/Common/ZooKeeper/LeaderElection.h index 4447c2ccfd2..b1649732db8 100644 --- a/dbms/src/Common/ZooKeeper/LeaderElection.h +++ b/dbms/src/Common/ZooKeeper/LeaderElection.h @@ -85,8 +85,7 @@ private: std::string node_path = node->getPath(); node_name = node_path.substr(node_path.find_last_of('/') + 1); - task->activate(); - task->schedule(); + task->activateAndSchedule(); } void releaseNode() diff --git a/dbms/src/Core/BackgroundSchedulePool.cpp b/dbms/src/Core/BackgroundSchedulePool.cpp index c9abf559092..d9c8bc158ad 100644 --- a/dbms/src/Core/BackgroundSchedulePool.cpp +++ b/dbms/src/Core/BackgroundSchedulePool.cpp @@ -43,17 +43,7 @@ bool BackgroundSchedulePool::TaskInfo::schedule() if (deactivated || scheduled) return false; - scheduled = true; - - if (delayed) - pool.cancelDelayedTask(shared_from_this(), lock); - - /// If the task is not executing at the moment, enqueue it for immediate execution. - /// But if it is currently executing, do nothing because it will be enqueued - /// at the end of the execute() method. - if (!executing) - pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); - + scheduleImpl(lock); return true; } @@ -89,6 +79,18 @@ void BackgroundSchedulePool::TaskInfo::activate() deactivated = false; } +bool BackgroundSchedulePool::TaskInfo::activateAndSchedule() +{ + std::lock_guard lock(schedule_mutex); + + deactivated = false; + if (scheduled) + return false; + + scheduleImpl(lock); + return true; +} + void BackgroundSchedulePool::TaskInfo::execute() { Stopwatch watch; @@ -129,6 +131,20 @@ void BackgroundSchedulePool::TaskInfo::execute() } } +void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard & schedule_mutex_lock) +{ + scheduled = true; + + if (delayed) + pool.cancelDelayedTask(shared_from_this(), schedule_mutex_lock); + + /// If the task is not executing at the moment, enqueue it for immediate execution. + /// But if it is currently executing, do nothing because it will be enqueued + /// at the end of the execute() method. + if (!executing) + pool.queue.enqueueNotification(new TaskNotification(shared_from_this())); +} + zkutil::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback() { return [t = shared_from_this()](const ZooKeeperImpl::ZooKeeper::WatchResponse &) diff --git a/dbms/src/Core/BackgroundSchedulePool.h b/dbms/src/Core/BackgroundSchedulePool.h index b7aa1592c19..f1ee5b67fcf 100644 --- a/dbms/src/Core/BackgroundSchedulePool.h +++ b/dbms/src/Core/BackgroundSchedulePool.h @@ -50,11 +50,14 @@ public: /// Schedule for execution after specified delay. bool scheduleAfter(size_t ms); - /// Further attempts to schedule become no-op. + /// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task. void deactivate(); void activate(); + /// Atomically activate task and schedule it for execution. + bool activateAndSchedule(); + /// get zkutil::WatchCallback needed for notifications from ZooKeeper watches. zkutil::WatchCallback getWatchCallback(); @@ -64,6 +67,8 @@ public: void execute(); + void scheduleImpl(std::lock_guard & schedule_mutex_lock); + BackgroundSchedulePool & pool; std::string log_name; TaskFunc function; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h index 3c64e6b5a5a..44659ea03ac 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeAlterThread.h @@ -23,11 +23,7 @@ class ReplicatedMergeTreeAlterThread public: ReplicatedMergeTreeAlterThread(StorageReplicatedMergeTree & storage_); - void start() - { - task->activate(); - task->schedule(); - } + void start() { task->activateAndSchedule(); } void stop() { task->deactivate(); } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index a4563794b62..d69e20e8c8d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -23,11 +23,7 @@ class ReplicatedMergeTreeCleanupThread public: ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); - void start() - { - task->activate(); - task->schedule(); - } + void start() { task->activateAndSchedule(); } void wakeup() { task->schedule(); } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 8efedb423d5..8fd0f33f43c 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -35,8 +35,7 @@ void ReplicatedMergeTreePartCheckThread::start() { std::lock_guard lock(start_stop_mutex); need_stop = false; - task->activate(); - task->schedule(); + task->activateAndSchedule(); } void ReplicatedMergeTreePartCheckThread::stop() diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index bb97aabe691..4e813f16a9a 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -179,10 +179,8 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.partial_shutdown_called = false; storage.partial_shutdown_event.reset(); - storage.queue_updating_task->activate(); - storage.queue_updating_task->schedule(); - storage.mutations_updating_task->activate(); - storage.mutations_updating_task->schedule(); + storage.queue_updating_task->activateAndSchedule(); + storage.mutations_updating_task->activateAndSchedule(); storage.cleanup_thread.start(); storage.alter_thread.start(); storage.part_check_thread.start(); diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index 28314a7d2c1..88c6fe755d1 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -24,11 +24,7 @@ class ReplicatedMergeTreeRestartingThread public: ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); - void start() - { - task->activate(); - task->schedule(); - } + void start() { task->activateAndSchedule(); } void wakeup() { task->schedule(); } diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 1ac71665580..74fe877b513 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2411,8 +2411,7 @@ void StorageReplicatedMergeTree::enterLeaderElection() LOG_INFO(log, "Became leader"); is_leader = true; - merge_selecting_task->activate(); - merge_selecting_task->schedule(); + merge_selecting_task->activateAndSchedule(); }; try From 66d49f0abc6073e4446051ed28019dfc3afead9a Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Wed, 22 Aug 2018 16:57:30 +0300 Subject: [PATCH 098/192] start and stop mutations_finalizing_task during replica restart [#CLICKHOUSE-3747] --- .../Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 4e813f16a9a..3e92d8b7a74 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -181,6 +181,7 @@ bool ReplicatedMergeTreeRestartingThread::tryStartup() storage.queue_updating_task->activateAndSchedule(); storage.mutations_updating_task->activateAndSchedule(); + storage.mutations_finalizing_task->activateAndSchedule(); storage.cleanup_thread.start(); storage.alter_thread.start(); storage.part_check_thread.start(); @@ -326,6 +327,7 @@ void ReplicatedMergeTreeRestartingThread::partialShutdown() storage.queue_updating_task->deactivate(); storage.mutations_updating_task->deactivate(); + storage.mutations_finalizing_task->deactivate(); storage.cleanup_thread.stop(); storage.alter_thread.stop(); From 9e485dae1e7fc476a2763e12408f0e1696438444 Mon Sep 17 00:00:00 2001 From: VadimPE Date: Wed, 22 Aug 2018 18:17:40 +0300 Subject: [PATCH 099/192] CLICKHOUSE-3819 add CASE without ELSE --- dbms/src/Parsers/ParserCase.cpp | 16 +++++++++++----- .../00688_case_without_else.reference | 4 ++++ .../0_stateless/00688_case_without_else.sql | 9 +++++++++ 3 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00688_case_without_else.reference create mode 100644 dbms/tests/queries/0_stateless/00688_case_without_else.sql diff --git a/dbms/src/Parsers/ParserCase.cpp b/dbms/src/Parsers/ParserCase.cpp index c2193e98e8f..ed1afd7c204 100644 --- a/dbms/src/Parsers/ParserCase.cpp +++ b/dbms/src/Parsers/ParserCase.cpp @@ -50,12 +50,18 @@ bool ParserCase::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!has_branch) return false; - if (!s_else.ignore(pos, expected)) - return false; - ASTPtr expr_else; - if (!p_expr.parse(pos, expr_else, expected)) - return false; + if (s_else.ignore(pos, expected)) + { + if (!p_expr.parse(pos, expr_else, expected)) + return false; + } + else + { + Field field_with_null; + ASTLiteral null_literal(field_with_null); + expr_else = std::make_shared(null_literal); + } args.push_back(expr_else); if (!s_end.ignore(pos, expected)) diff --git a/dbms/tests/queries/0_stateless/00688_case_without_else.reference b/dbms/tests/queries/0_stateless/00688_case_without_else.reference new file mode 100644 index 00000000000..f0aad9ef7ae --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_case_without_else.reference @@ -0,0 +1,4 @@ +0 +NULL +0 +NULL diff --git a/dbms/tests/queries/0_stateless/00688_case_without_else.sql b/dbms/tests/queries/0_stateless/00688_case_without_else.sql new file mode 100644 index 00000000000..acbb7bfb8f5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00688_case_without_else.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test.test; + +CREATE TABLE test.test (a UInt8) ENGINE = Memory; + +INSERT INTO test.test VALUES (1), (2), (1), (3); + +SELECT CASE WHEN a=1 THEN 0 END FROM test.test; + +DROP TABLE test.test; From 4d89caf4222de62bf2afcec71c58d80586686ece Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 22 Aug 2018 18:41:21 +0300 Subject: [PATCH 100/192] fix decimal-arithm test (on clang build) CLICKHOUSE-3765 --- dbms/tests/queries/0_stateless/00700_decimal_arithm.sql | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00700_decimal_arithm.sql b/dbms/tests/queries/0_stateless/00700_decimal_arithm.sql index f4d11ea7431..fdb531255bc 100644 --- a/dbms/tests/queries/0_stateless/00700_decimal_arithm.sql +++ b/dbms/tests/queries/0_stateless/00700_decimal_arithm.sql @@ -29,14 +29,16 @@ SELECT e + e, e - e, e * e, e / e FROM test.decimal WHERE e > 0; -- { serverErro SELECT f + f, f - f, f * f, f / f FROM test.decimal WHERE f > 0; -- { serverError 69 } SELECT g + g, g - g, g * g, g / g FROM test.decimal WHERE g > 0; SELECT h + h, h - h, h * h, h / h FROM test.decimal WHERE h > 0; -- { serverError 407 } -SELECT i + i, i - i, i * i, i / i FROM test.decimal WHERE i > 0; -- { serverError 407 } +SELECT 1 LIMIT 0; +--SELECT i + i, i - i, i * i, i / i FROM test.decimal WHERE i > 0; -- { serverError 407 } SELECT j + j, j - j, j * j, j / j FROM test.decimal WHERE j > 0; SELECT a + 21, a - 21, a - 84, a * 21, a * -21, a / 21, a / 84 FROM test.decimal WHERE a = 42; SELECT b + 21, b - 21, b - 84, b * 21, b * -21, b / 21, b / 84 FROM test.decimal WHERE b = 42; SELECT c + 21, c - 21, c - 84, c * 21, c * -21, c / 21, c / 84 FROM test.decimal WHERE c = 42; SELECT e + 21, e - 21, e - 84, e * 21, e * -21, e / 21, e / 84 FROM test.decimal WHERE e > 0; -- { serverError 407 } -SELECT f + 21, f - 21, f - 84, f * 21, f * -21, f / 21, f / 84 FROM test.decimal WHERE f > 0; -- { serverError 407 } +SELECT 1 LIMIT 0; +--SELECT f + 21, f - 21, f - 84, f * 21, f * -21, f / 21, f / 84 FROM test.decimal WHERE f > 0; -- { serverError 407 } SELECT g + 21, g - 21, g - 84, g * 21, g * -21, g / 21, g / 84 FROM test.decimal WHERE g > 0; SELECT h + 21, h - 21, h - 84, h * 21, h * -21, h / 21, h / 84 FROM test.decimal WHERE h > 0; SELECT i + 21, i - 21, i - 84, i * 21, i * -21, i / 21, i / 84 FROM test.decimal WHERE i > 0; @@ -46,7 +48,8 @@ SELECT 21 + a, 21 - a, 84 - a, 21 * a, -21 * a, 21 / a, 84 / a FROM test.decimal SELECT 21 + b, 21 - b, 84 - b, 21 * b, -21 * b, 21 / b, 84 / b FROM test.decimal WHERE b = 42; SELECT 21 + c, 21 - c, 84 - c, 21 * c, -21 * c, 21 / c, 84 / c FROM test.decimal WHERE c = 42; SELECT 21 + e, 21 - e, 84 - e, 21 * e, -21 * e, 21 / e, 84 / e FROM test.decimal WHERE e > 0; -- { serverError 407 } -SELECT 21 + f, 21 - f, 84 - f, 21 * f, -21 * f, 21 / f, 84 / f FROM test.decimal WHERE f > 0; -- { serverError 407 } +SELECT 1 LIMIT 0; +--SELECT 21 + f, 21 - f, 84 - f, 21 * f, -21 * f, 21 / f, 84 / f FROM test.decimal WHERE f > 0; -- { serverError 407 } SELECT 21 + g, 21 - g, 84 - g, 21 * g, -21 * g, 21 / g, 84 / g FROM test.decimal WHERE g > 0; SELECT 21 + h, 21 - h, 84 - h, 21 * h, -21 * h FROM test.decimal WHERE h > 0; --overflow 21 / h, 84 / h SELECT 21 + i, 21 - i, 84 - i, 21 * i, -21 * i, 21 / i, 84 / i FROM test.decimal WHERE i > 0; From e2f18da1a93b499712949a1fa8858e13e12ac5c0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Aug 2018 18:42:27 +0300 Subject: [PATCH 101/192] CLICKHOUSE-3878: Add ODBC test --- dbms/src/Common/ODBCBridgeHelper.cpp | 7 +- dbms/tests/integration/helpers/client.py | 15 ++-- dbms/tests/integration/helpers/cluster.py | 88 +++++++++++++++++-- .../test_odbc_interaction/__init__.py | 0 .../integration/test_odbc_interaction/test.py | 35 ++++++++ 5 files changed, 128 insertions(+), 17 deletions(-) create mode 100644 dbms/tests/integration/test_odbc_interaction/__init__.py create mode 100644 dbms/tests/integration/test_odbc_interaction/test.py diff --git a/dbms/src/Common/ODBCBridgeHelper.cpp b/dbms/src/Common/ODBCBridgeHelper.cpp index 785c457062d..ae1087dfd7e 100644 --- a/dbms/src/Common/ODBCBridgeHelper.cpp +++ b/dbms/src/Common/ODBCBridgeHelper.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -32,13 +33,13 @@ ODBCBridgeHelper::ODBCBridgeHelper( void ODBCBridgeHelper::startODBCBridge() const { Poco::Path path{config.getString("application.dir", "")}; - path.setFileName("clickhouse-odbc-bridge"); + path.setFileName("clickhouse"); - if (!path.isFile()) + if (!Poco::File(path).exists()) throw Exception("clickhouse-odbc-bridge is not found", ErrorCodes::EXTERNAL_EXECUTABLE_NOT_FOUND); std::stringstream command; - command << path.toString() << ' '; + command << path.toString() << " odbc-bridge "; command << "--http-port " << config.getUInt("odbc_bridge.port", DEFAULT_PORT) << ' '; command << "--listen-host " << config.getString("odbc_bridge.listen_host", DEFAULT_HOST) << ' '; command << "--http-timeout " << http_timeout.totalMicroseconds() << ' '; diff --git a/dbms/tests/integration/helpers/client.py b/dbms/tests/integration/helpers/client.py index 355f796b22e..3324a9c8ba3 100644 --- a/dbms/tests/integration/helpers/client.py +++ b/dbms/tests/integration/helpers/client.py @@ -11,11 +11,11 @@ class Client: self.command = [command, '--host', self.host, '--port', str(self.port), '--stacktrace'] - def query(self, sql, stdin=None, timeout=None, settings=None, user=None): - return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user).get_answer() + def query(self, sql, stdin=None, timeout=None, settings=None, user=None, ignore_error=False): + return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user, ignore_error=ignore_error).get_answer() - def get_query_request(self, sql, stdin=None, timeout=None, settings=None, user=None): + def get_query_request(self, sql, stdin=None, timeout=None, settings=None, user=None, ignore_error=False): command = self.command[:] if stdin is None: @@ -31,7 +31,7 @@ class Client: if user is not None: command += ['--user', user] - return CommandRequest(command, stdin, timeout) + return CommandRequest(command, stdin, timeout, ignore_error) class QueryTimeoutExceedException(Exception): @@ -43,13 +43,14 @@ class QueryRuntimeException(Exception): class CommandRequest: - def __init__(self, command, stdin=None, timeout=None): + def __init__(self, command, stdin=None, timeout=None, ignore_error=False): # Write data to tmp file to avoid PIPEs and execution blocking stdin_file = tempfile.TemporaryFile() stdin_file.write(stdin) stdin_file.seek(0) self.stdout_file = tempfile.TemporaryFile() self.stderr_file = tempfile.TemporaryFile() + self.ignore_error = ignore_error #print " ".join(command) @@ -75,10 +76,10 @@ class CommandRequest: stdout = self.stdout_file.read() stderr = self.stderr_file.read() - if self.timer is not None and not self.process_finished_before_timeout: + if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error: raise QueryTimeoutExceedException('Client timed out!') - if self.process.returncode != 0 or stderr: + if (self.process.returncode != 0 or stderr) and not self.ignore_error: raise QueryRuntimeException('Client failed! Return code: {}, stderr: {}'.format(self.process.returncode, stderr)) return stdout diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 0ca348c2364..3aeca080aab 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -9,6 +9,7 @@ import socket import time import errno from dicttoxml import dicttoxml +import pymysql import xml.dom.minidom from kazoo.client import KazooClient from kazoo.exceptions import KazooException @@ -22,7 +23,6 @@ from .client import Client, CommandRequest HELPERS_DIR = p.dirname(__file__) DEFAULT_ENV_NAME = 'env_file' - def _create_env_file(path, variables, fname=DEFAULT_ENV_NAME): full_path = os.path.join(path, fname) with open(full_path, 'w') as f: @@ -63,12 +63,13 @@ class ClickHouseCluster: self.with_zookeeper = False self.with_mysql = False self.with_kafka = False + self.with_odbc_drivers = False self.docker_client = None self.is_up = False - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, hostname=None, env_variables={}): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, hostname=None, env_variables={}, image="ubuntu:14.04"): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -86,7 +87,8 @@ class ClickHouseCluster: instance = ClickHouseInstance( self, self.base_dir, name, config_dir, main_configs, user_configs, macros, with_zookeeper, - self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, clickhouse_path_dir, hostname=hostname, env_variables=env_variables) + self.zookeeper_config_path, with_mysql, with_kafka, self.base_configs_dir, self.server_bin_path, + clickhouse_path_dir, with_odbc_drivers, hostname=hostname, env_variables=env_variables, image=image) self.instances[name] = instance self.base_cmd.extend(['--file', instance.docker_compose_path]) @@ -102,6 +104,14 @@ class ClickHouseCluster: self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mysql.yml')] + if with_odbc_drivers and not self.with_odbc_drivers: + self.with_odbc_drivers = True + if not self.with_mysql: + self.with_mysql = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_mysql.yml')]) + self.base_mysql_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_mysql.yml')] + if with_kafka and not self.with_kafka: self.with_kafka = True self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')]) @@ -121,6 +131,19 @@ class ClickHouseCluster: handle = self.docker_client.containers.get(docker_id) return handle.attrs['NetworkSettings']['Networks'].values()[0]['IPAddress'] + def wait_mysql_to_start(self, timeout=60): + start = time.time() + while time.time() - start < timeout: + try: + conn = pymysql.connect(user='root', password='clickhouse', host='127.0.0.1', port=3308) + conn.close() + print "Mysql Started" + return + except Exception: + time.sleep(0.5) + + raise Exception("Cannot wait MySQL container") + def start(self, destroy_dirs=True): if self.is_up: @@ -149,6 +172,7 @@ class ClickHouseCluster: if self.with_mysql and self.base_mysql_cmd: subprocess.check_call(self.base_mysql_cmd + ['up', '-d', '--no-recreate']) + self.wait_mysql_to_start() if self.with_kafka and self.base_kafka_cmd: subprocess.check_call(self.base_kafka_cmd + ['up', '-d', '--no-recreate']) @@ -168,7 +192,6 @@ class ClickHouseCluster: instance.client = Client(instance.ip_address, command=self.client_bin_path) - self.is_up = True @@ -212,7 +235,7 @@ DOCKER_COMPOSE_TEMPLATE = ''' version: '2' services: {name}: - image: ubuntu:14.04 + image: {image} hostname: {hostname} user: '{uid}' volumes: @@ -220,6 +243,7 @@ services: - {configs_dir}:/etc/clickhouse-server/ - {db_dir}:/var/lib/clickhouse/ - {logs_dir}:/var/log/clickhouse-server/ + {odbc_ini_path} entrypoint: - /usr/bin/clickhouse - server @@ -233,9 +257,11 @@ services: class ClickHouseInstance: + def __init__( self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros, - with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, clickhouse_path_dir, hostname=None, env_variables={}): + with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, base_configs_dir, server_bin_path, + clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables={}, image="ubuntu:14.04"): self.name = name self.base_cmd = cluster.base_cmd[:] @@ -260,11 +286,17 @@ class ClickHouseInstance: self.path = p.join(self.cluster.instances_dir, name) self.docker_compose_path = p.join(self.path, 'docker_compose.yml') self.env_variables = env_variables + if with_odbc_drivers: + self.odbc_ini_path = os.path.dirname(self.docker_compose_path) + "/odbc.ini:/etc/odbc.ini" + self.with_mysql = True + else: + self.odbc_ini_path = "" self.docker_client = None self.ip_address = None self.client = None self.default_timeout = 20.0 # 20 sec + self.image = image # Connects to the instance via clickhouse-client, sends a query (1st argument) and returns the answer def query(self, *args, **kwargs): @@ -340,6 +372,40 @@ class ClickHouseInstance: xml_str = dicttoxml(dictionary, custom_root="yandex", attr_type=False) return xml.dom.minidom.parseString(xml_str).toprettyxml() + @property + def odbc_drivers(self): + if self.odbc_ini_path: + return { + "SQLite3": { + "DSN": "sqlite3_odbc", + "Database" : "/tmp/sqliteodbc", + "Driver": "/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so", + "Setup": "/usr/lib/x86_64-linux-gnu/odbc/libsqlite3odbc.so", + }, + "MySQL": { + "DSN": "mysql_odbc", + "Driver": "/usr/lib/x86_64-linux-gnu/odbc/libmyodbc.so", + "Database": "clickhouse", + "Uid": "root", + "Pwd": "clickhouse", + "Server": "mysql1", + }, + "PostgreSQL": { + "DSN": "postgresql_odbc", + "Driver": "/usr/lib/x86_64-linux-gnu/odbc/psqlodbca.so", + "Setup": "/usr/lib/x86_64-linux-gnu/odbc/libodbcpsqlS.so", + } + } + else: + return {} + + def _create_odbc_config_file(self): + with open(self.odbc_ini_path.split(':')[0], 'w') as f: + for driver_setup in self.odbc_drivers.values(): + f.write("[{}]\n".format(driver_setup["DSN"])) + for key, value in driver_setup.items(): + if key != "DSN": + f.write(key + "=" + value + "\n") def create_dir(self, destroy_dir=True): """Create the instance directory and all the needed files there.""" @@ -409,8 +475,14 @@ class ClickHouseInstance: env_file = _create_env_file(os.path.dirname(self.docker_compose_path), self.env_variables) + odbc_ini_path = "" + if self.odbc_ini_path: + self._create_odbc_config_file() + odbc_ini_path = '- ' + self.odbc_ini_path + with open(self.docker_compose_path, 'w') as docker_compose: docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format( + image=self.image, name=self.name, hostname=self.hostname, uid=os.getuid(), @@ -420,7 +492,9 @@ class ClickHouseInstance: db_dir=db_dir, logs_dir=logs_dir, depends_on=str(depends_on), - env_file=env_file)) + env_file=env_file, + odbc_ini_path=odbc_ini_path, + )) def destroy_dir(self): diff --git a/dbms/tests/integration/test_odbc_interaction/__init__.py b/dbms/tests/integration/test_odbc_interaction/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py new file mode 100644 index 00000000000..a53f78030e5 --- /dev/null +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -0,0 +1,35 @@ +import time +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__, server_bin_path="/home/alesap/ClickHouse/dbms/programs/clickhouse") +node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, image='alesapin/ubuntu_with_odbc:14.04') + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def test_segfault_doesnt_crash_server(started_cluster): + mysql_setup = node1.odbc_drivers["MySQL"] + # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault + node1.query("select 1 from odbc('DSN={}', 'dual')".format(mysql_setup["DSN"]), ignore_error=True) + + # but after segfault server is still available + assert node1.query("select 1") == "1\n" + +def test_simple_select_works(started_cluster): + sqlite_setup = node1.odbc_drivers["SQLite3"] + sqlite_db = sqlite_setup["Database"] + + node1.exec_in_container(["bash", "-c", "echo 'CREATE TABLE t1(x INTEGER PRIMARY KEY ASC, y, z);' | sqlite3 {}".format(sqlite_db)], privileged=True, user='root') + node1.exec_in_container(["bash", "-c", "echo 'INSERT INTO t1 values(1, 2, 3);' | sqlite3 {}".format(sqlite_db)], privileged=True, user='root') + assert node1.query("select * from odbc('DSN={}', '{}')".format(sqlite_setup["DSN"], 't1')) == "1\t2\t3\n" + + From ba0bf141757ccb0475b290c2ece2e391665ee4af Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Aug 2018 18:54:08 +0300 Subject: [PATCH 102/192] CLICKHOUSE-3878: Fix message --- dbms/src/Common/ODBCBridgeHelper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/ODBCBridgeHelper.cpp b/dbms/src/Common/ODBCBridgeHelper.cpp index ae1087dfd7e..f4229d0e568 100644 --- a/dbms/src/Common/ODBCBridgeHelper.cpp +++ b/dbms/src/Common/ODBCBridgeHelper.cpp @@ -36,7 +36,7 @@ void ODBCBridgeHelper::startODBCBridge() const path.setFileName("clickhouse"); if (!Poco::File(path).exists()) - throw Exception("clickhouse-odbc-bridge is not found", ErrorCodes::EXTERNAL_EXECUTABLE_NOT_FOUND); + throw Exception("clickhouse binary is not found", ErrorCodes::EXTERNAL_EXECUTABLE_NOT_FOUND); std::stringstream command; command << path.toString() << " odbc-bridge "; From 4d919759bf646f7ee3cfb9a1afba2cd8099824e3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Aug 2018 18:56:04 +0300 Subject: [PATCH 103/192] CLICKHOUSE-3878: Remove local path --- dbms/tests/integration/test_odbc_interaction/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index a53f78030e5..a53b78965a4 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -3,7 +3,7 @@ import pytest from helpers.cluster import ClickHouseCluster -cluster = ClickHouseCluster(__file__, server_bin_path="/home/alesap/ClickHouse/dbms/programs/clickhouse") +cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, image='alesapin/ubuntu_with_odbc:14.04') @pytest.fixture(scope="module") From 8163fd2b214599138ffa8f7e86afe21646e658f1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Aug 2018 19:14:51 +0300 Subject: [PATCH 104/192] CLICKHOUSE-3878: Slightly improve test --- .../integration/test_odbc_interaction/test.py | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index a53b78965a4..85a59e91fb4 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -1,30 +1,69 @@ import time import pytest +import pymysql.cursors from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', with_odbc_drivers=True, with_mysql=True, image='alesapin/ubuntu_with_odbc:14.04') +create_table_sql_template = """ + CREATE TABLE `clickhouse`.`{}` ( + `id` int(11) NOT NULL, + `name` varchar(50) NOT NULL, + `age` int NOT NULL default 0, + `money` int NOT NULL default 0, + PRIMARY KEY (`id`)) ENGINE=InnoDB; + """ +def get_mysql_conn(): + conn = pymysql.connect(user='root', password='clickhouse', host='127.0.0.1', port=3308) + return conn + +def create_mysql_db(conn, name): + with conn.cursor() as cursor: + cursor.execute( + "CREATE DATABASE {} DEFAULT CHARACTER SET 'utf8'".format(name)) + +def create_mysql_table(conn, table_name): + with conn.cursor() as cursor: + cursor.execute(create_table_sql_template.format(table_name)) + @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() + conn = get_mysql_conn() + ## create mysql db and table + create_mysql_db(conn, 'clickhouse') + yield cluster finally: cluster.shutdown() -def test_segfault_doesnt_crash_server(started_cluster): +def test_mysql_simple_select_works(started_cluster): mysql_setup = node1.odbc_drivers["MySQL"] - # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault - node1.query("select 1 from odbc('DSN={}', 'dual')".format(mysql_setup["DSN"]), ignore_error=True) - # but after segfault server is still available + table_name = 'test_insert_select' + conn = get_mysql_conn() + create_mysql_table(conn, table_name) + + node1.query(''' +CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32) ENGINE = MySQL('mysql1:3306', 'clickhouse', '{}', 'root', 'clickhouse'); +'''.format(table_name, table_name)) + + node1.query("INSERT INTO {}(id, name, money) select number, concat('name_', toString(number)), 3 from numbers(100) ".format(table_name)) + + # actually, I don't know, what wrong with that connection string, but libmyodbc always falls into segfault + node1.query("SELECT * FROM odbc('DSN={}', '{}')".format(mysql_setup["DSN"], table_name), ignore_error=True) + + # server still works after segfault assert node1.query("select 1") == "1\n" -def test_simple_select_works(started_cluster): + conn.close() + +def test_sqlite_simple_select_works(started_cluster): sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] From a6ca534a376f58036fcc20a348dbe25594399433 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Aug 2018 19:16:12 +0300 Subject: [PATCH 105/192] CLICKHOUSE-3878: Remove empty lines --- dbms/tests/integration/test_odbc_interaction/test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/tests/integration/test_odbc_interaction/test.py b/dbms/tests/integration/test_odbc_interaction/test.py index 85a59e91fb4..4958a08c3ff 100644 --- a/dbms/tests/integration/test_odbc_interaction/test.py +++ b/dbms/tests/integration/test_odbc_interaction/test.py @@ -70,5 +70,3 @@ def test_sqlite_simple_select_works(started_cluster): node1.exec_in_container(["bash", "-c", "echo 'CREATE TABLE t1(x INTEGER PRIMARY KEY ASC, y, z);' | sqlite3 {}".format(sqlite_db)], privileged=True, user='root') node1.exec_in_container(["bash", "-c", "echo 'INSERT INTO t1 values(1, 2, 3);' | sqlite3 {}".format(sqlite_db)], privileged=True, user='root') assert node1.query("select * from odbc('DSN={}', '{}')".format(sqlite_setup["DSN"], 't1')) == "1\t2\t3\n" - - From 5951fd677d4a3c416238ebd93d97eb6d4fcf93e1 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 19:52:24 +0300 Subject: [PATCH 106/192] wip --- dbms/tests/clickhouse-test-server | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server index 79ef37c5e34..3845da2a93f 100755 --- a/dbms/tests/clickhouse-test-server +++ b/dbms/tests/clickhouse-test-server @@ -77,7 +77,7 @@ $GDB ${BIN_DIR}clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- \ $INTERNAL_COMPILER_PARAMS \ > $LOG_DIR/stdout 2>&1 & CH_PID=$! -sleep ${TEST_SERVER_STARTUP_WAIT:=10} +sleep ${TEST_SERVER_STARTUP_WAIT:=5} if [ "$GDB" ]; then # Long symbols read From e7d7479ed081d79377d595da67a68ee7b70ba006 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 19:53:11 +0300 Subject: [PATCH 107/192] fix --- dbms/src/Common/Stopwatch.cpp | 2 +- dbms/src/Common/ThreadProfileEvents.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/Stopwatch.cpp b/dbms/src/Common/Stopwatch.cpp index ef48d910135..0929ce98dc6 100644 --- a/dbms/src/Common/Stopwatch.cpp +++ b/dbms/src/Common/Stopwatch.cpp @@ -7,7 +7,7 @@ StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current() ::rusage rusage; #if defined(__APPLE__) - ::getrusage(RUSAGE_SELF, &rusage); // TODO FIXME! need rusage_thread + //::getrusage(RUSAGE_SELF, &rusage); // TODO FIXME! need rusage_thread #else ::getrusage(RUSAGE_THREAD, &rusage); #endif diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 46107f7b38d..17f92d9a56c 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -91,7 +91,7 @@ struct RUsageCounters { ::rusage rusage; #if defined(__APPLE__) - ::getrusage(RUSAGE_SELF, &rusage); //TODO? + //::getrusage(RUSAGE_SELF, &rusage); //TODO? #else ::getrusage(RUSAGE_THREAD, &rusage); #endif From e40708a1a52bafea32e8e0b3c5db1b351348c195 Mon Sep 17 00:00:00 2001 From: proller Date: Wed, 22 Aug 2018 19:56:00 +0300 Subject: [PATCH 108/192] clean --- dbms/src/Common/Stopwatch.cpp | 4 +--- dbms/src/Common/ThreadProfileEvents.h | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dbms/src/Common/Stopwatch.cpp b/dbms/src/Common/Stopwatch.cpp index 0929ce98dc6..711ab16d3af 100644 --- a/dbms/src/Common/Stopwatch.cpp +++ b/dbms/src/Common/Stopwatch.cpp @@ -6,9 +6,7 @@ StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current() StopwatchRUsage::Timestamp res; ::rusage rusage; -#if defined(__APPLE__) - //::getrusage(RUSAGE_SELF, &rusage); // TODO FIXME! need rusage_thread -#else +#if !defined(__APPLE__) ::getrusage(RUSAGE_THREAD, &rusage); #endif res.user_ns = rusage.ru_utime.tv_sec * 1000000000UL + rusage.ru_utime.tv_usec * 1000UL; diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 17f92d9a56c..523da7d107b 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -90,9 +90,7 @@ struct RUsageCounters static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds()) { ::rusage rusage; -#if defined(__APPLE__) - //::getrusage(RUSAGE_SELF, &rusage); //TODO? -#else +#if !defined(__APPLE__) ::getrusage(RUSAGE_THREAD, &rusage); #endif return RUsageCounters(rusage, real_time_); From ee61c87fef692e397dee1289b3956e5aae140603 Mon Sep 17 00:00:00 2001 From: VadimPE Date: Wed, 22 Aug 2018 21:40:08 +0300 Subject: [PATCH 109/192] CLICKHOUSE-3819 fix tests --- .../queries/0_stateless/00688_case_without_else.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00688_case_without_else.reference b/dbms/tests/queries/0_stateless/00688_case_without_else.reference index f0aad9ef7ae..389736de6aa 100644 --- a/dbms/tests/queries/0_stateless/00688_case_without_else.reference +++ b/dbms/tests/queries/0_stateless/00688_case_without_else.reference @@ -1,4 +1,4 @@ 0 -NULL + 0 -NULL + From 4f7b8fdbd95366259feee414e2c7dd52e84305f6 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Wed, 22 Aug 2018 22:23:13 +0300 Subject: [PATCH 110/192] Update 00688_case_without_else.reference --- .../queries/0_stateless/00688_case_without_else.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00688_case_without_else.reference b/dbms/tests/queries/0_stateless/00688_case_without_else.reference index 389736de6aa..cf4557a6874 100644 --- a/dbms/tests/queries/0_stateless/00688_case_without_else.reference +++ b/dbms/tests/queries/0_stateless/00688_case_without_else.reference @@ -1,4 +1,4 @@ 0 - +\N 0 - +\N From a5a2ec6503fc3f949fc51ca7755ee4a8090062b8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 00:00:52 +0300 Subject: [PATCH 111/192] Fixed bad logic #2482 --- debian/clickhouse-server.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 85cf9c9e72a..780b51ea46f 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -165,7 +165,7 @@ start() mkdir -p $CLICKHOUSE_PIDDIR chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR # Set net_admin capabilities to support ClickHouse better introspection - su -s setcap cap_net_admin=+ep "$BINDIR/$PROGRAM" + setcap cap_net_admin=+ep "$BINDIR/$GENERIC_PROGRAM" initdb if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. From 0c129ab7f43b8ef9a175b4b33a33380fdad17a17 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 02:54:33 +0300 Subject: [PATCH 112/192] More descriptive error message in test [#CLICKHOUSE-2] --- dbms/tests/queries/0_stateless/00415_into_outfile.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00415_into_outfile.sh b/dbms/tests/queries/0_stateless/00415_into_outfile.sh index ec7cef9dbba..ce934f915a1 100755 --- a/dbms/tests/queries/0_stateless/00415_into_outfile.sh +++ b/dbms/tests/queries/0_stateless/00415_into_outfile.sh @@ -28,10 +28,11 @@ perform "describe_table" "DESCRIBE TABLE system.one INTO OUTFILE '${CLICKHOUSE_T echo "performing test: clickhouse-local" echo -e '1\t2' | ${CLICKHOUSE_LOCAL} -s --structure 'col1 UInt32, col2 UInt32' --query "SELECT col1 + 1, col2 + 1 FROM table INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out'" -if [ "$?" -eq 0 ]; then +err=$? +if [ "$err" -eq 0 ]; then cat "${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out" else - echo "query failed" + echo "query failed with exit code $err" fi rm -f "${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out" From 61b8b60f533a88d4a08c5ab021f5191835272b2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 03:12:24 +0300 Subject: [PATCH 113/192] Whitespaces [#CLICKHOUSE-3878] --- dbms/src/Common/ODBCBridgeHelper.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dbms/src/Common/ODBCBridgeHelper.cpp b/dbms/src/Common/ODBCBridgeHelper.cpp index f4229d0e568..62022e2b444 100644 --- a/dbms/src/Common/ODBCBridgeHelper.cpp +++ b/dbms/src/Common/ODBCBridgeHelper.cpp @@ -14,10 +14,12 @@ namespace DB { + namespace ErrorCodes { extern const int EXTERNAL_SERVER_IS_NOT_RESPONDING; } + ODBCBridgeHelper::ODBCBridgeHelper( const Configuration & config_, const Poco::Timespan & http_timeout_, const std::string & connection_string_) : config(config_), http_timeout(http_timeout_), connection_string(connection_string_) @@ -30,6 +32,7 @@ ODBCBridgeHelper::ODBCBridgeHelper( ping_url.setScheme("http"); ping_url.setPath(PING_HANDLER); } + void ODBCBridgeHelper::startODBCBridge() const { Poco::Path path{config.getString("application.dir", "")}; From 61f3c4f0d1c6136c09f187b279c728c500b6bb32 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 03:14:26 +0300 Subject: [PATCH 114/192] Removed useless option --silent from the clickhouse-local, because it was implemented not according to the specs and the options --silent, --verbose, --echo was messed up #1773 --- dbms/programs/local/LocalServer.cpp | 19 ++++----- ...5_storage_file_and_clickhouse-local_app.sh | 2 +- .../0_stateless/00407_parsing_nulls.sh | 40 +++++++++---------- .../queries/0_stateless/00415_into_outfile.sh | 2 +- .../00512_fractional_time_zones.sh | 6 +-- 5 files changed, 32 insertions(+), 37 deletions(-) diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 4528ad40128..4b8d43a8093 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -104,8 +104,8 @@ try if (!config().has("query") && !config().has("table-structure")) /// Nothing to process { - if (!config().hasOption("silent")) - std::cerr << "There are no queries to process." << std::endl; + if (config().hasOption("verbose")) + std::cerr << "There are no queries to process." << '\n'; return Application::EXIT_OK; } @@ -200,8 +200,7 @@ try } catch (const Exception & e) { - if (!config().hasOption("silent")) - std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")); + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; /// If exception code isn't zero, we should return non-zero return code anyway. return e.code() ? e.code() : -1; @@ -283,7 +282,7 @@ void LocalServer::processQueries() WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); if (echo_query) - std::cerr << query << "\n"; + std::cerr << query << '\n'; try { @@ -297,8 +296,7 @@ void LocalServer::processQueries() if (!exception) exception = std::current_exception(); - if (!config().has("silent")) - std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")); + std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; } } @@ -360,7 +358,7 @@ void LocalServer::setupUsers() static void showClientVersion() { - std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << std::endl; + std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << '\n'; } std::string LocalServer::getHelpHeader() const @@ -421,7 +419,6 @@ void LocalServer::init(int argc, char ** argv) ("format,f", po::value(), "default output format (clickhouse-client compatibility)") ("output-format", po::value(), "default output format") - ("silent,s", "quiet mode, do not print errors") ("stacktrace", "print stack traces of exceptions") ("echo", "print query before execution") ("verbose", "print query and other debugging info") @@ -477,8 +474,6 @@ void LocalServer::init(int argc, char ** argv) if (options.count("output-format")) config().setString("output-format", options["output-format"].as()); - if (options.count("silent")) - config().setBool("silent", true); if (options.count("stacktrace")) config().setBool("stacktrace", true); if (options.count("echo")) @@ -507,7 +502,7 @@ int mainEntryClickHouseLocal(int argc, char ** argv) } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(true) << "\n"; + std::cerr << DB::getCurrentExceptionMessage(true) << '\n'; auto code = DB::getCurrentExceptionCode(); return code ? code : 1; } diff --git a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh index 98acb2f949d..744242e721a 100755 --- a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh +++ b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh @@ -44,7 +44,7 @@ ${CLICKHOUSE_LOCAL} -q "SET max_rows_in_distinct=33; SELECT name, value FROM sys ${CLICKHOUSE_LOCAL} --max_bytes_before_external_group_by=1 --max_block_size=10 -q "SELECT sum(ignore(*)) FROM (SELECT number, count() FROM numbers(1000) GROUP BY number)" echo # Check exta options -(${CLICKHOUSE_LOCAL} --ignore-error --echo --silent -q "SELECT nothing_to_do();SELECT 42;" 2>&1 && echo "Wrong RC") || true +(${CLICKHOUSE_LOCAL} --ignore-error --echo -q "SELECT nothing_to_do();SELECT 42;" 2>&1 && echo "Wrong RC") || true echo ${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default ( diff --git a/dbms/tests/queries/0_stateless/00407_parsing_nulls.sh b/dbms/tests/queries/0_stateless/00407_parsing_nulls.sh index f4bd51e2a68..3d06307beb4 100755 --- a/dbms/tests/queries/0_stateless/00407_parsing_nulls.sh +++ b/dbms/tests/queries/0_stateless/00407_parsing_nulls.sh @@ -5,26 +5,26 @@ set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} -s --input-format=TabSeparated --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} -s --input-format=JSONEachRow --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} -s --input-format=Values --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} --input-format=TabSeparated --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} --input-format=JSONEachRow --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} --input-format=Values --output-format=TabSeparated --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} -s --input-format=TabSeparated --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} -s --input-format=JSONEachRow --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} -s --input-format=Values --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} --input-format=TabSeparated --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} --input-format=JSONEachRow --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} --input-format=Values --output-format=CSV --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} -s --input-format=TabSeparated --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} -s --input-format=JSONEachRow --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} -s --input-format=Values --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} --input-format=TabSeparated --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} --input-format=JSONEachRow --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} --input-format=Values --output-format=JSONEachRow --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} -s --input-format=TabSeparated --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} -s --input-format=CSV --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} -s --input-format=JSONEachRow --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" -echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} -s --input-format=Values --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '\\tHello\t123\t\\N\n\\N\t\t2000-01-01 00:00:00\n' | ${CLICKHOUSE_LOCAL} --input-format=TabSeparated --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne 'Hello,123,\\N\n\\N,0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '"\\Hello",123,\\N\n"\\N",0,"2000-01-01 00:00:00"' | ${CLICKHOUSE_LOCAL} --input-format=CSV --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo -ne '{"s" : null, "x" : 123}, {"s" : "\N", "t":"2000-01-01 00:00:00"}' | ${CLICKHOUSE_LOCAL} --input-format=JSONEachRow --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" +echo "(NULL, 111, '2000-01-01 00:00:00'), ('\N', NULL, NULL), ('a\Nb', NULL, NULL)" | ${CLICKHOUSE_LOCAL} --input-format=Values --output-format=Values --structure='s Nullable(String), x Nullable(UInt64), t Nullable(DateTime)' --query="SELECT * FROM table" diff --git a/dbms/tests/queries/0_stateless/00415_into_outfile.sh b/dbms/tests/queries/0_stateless/00415_into_outfile.sh index ce934f915a1..a0b07b6831c 100755 --- a/dbms/tests/queries/0_stateless/00415_into_outfile.sh +++ b/dbms/tests/queries/0_stateless/00415_into_outfile.sh @@ -27,7 +27,7 @@ perform "bad_union_all" "SELECT 1, 2 INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_o perform "describe_table" "DESCRIBE TABLE system.one INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_describe_table.out'" echo "performing test: clickhouse-local" -echo -e '1\t2' | ${CLICKHOUSE_LOCAL} -s --structure 'col1 UInt32, col2 UInt32' --query "SELECT col1 + 1, col2 + 1 FROM table INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out'" +echo -e '1\t2' | ${CLICKHOUSE_LOCAL} --structure 'col1 UInt32, col2 UInt32' --query "SELECT col1 + 1, col2 + 1 FROM table INTO OUTFILE '${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out'" err=$? if [ "$err" -eq 0 ]; then cat "${CLICKHOUSE_TMP}/test_into_outfile_clickhouse-local.out" diff --git a/dbms/tests/queries/0_stateless/00512_fractional_time_zones.sh b/dbms/tests/queries/0_stateless/00512_fractional_time_zones.sh index 2ac93a796c4..0f0386e4c34 100755 --- a/dbms/tests/queries/0_stateless/00512_fractional_time_zones.sh +++ b/dbms/tests/queries/0_stateless/00512_fractional_time_zones.sh @@ -3,6 +3,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -TZ=Europe/Moscow ${CLICKHOUSE_LOCAL} -s --query="SELECT toDateTime('1990-10-19 00:00:00')" -TZ=Asia/Colombo ${CLICKHOUSE_LOCAL} -s --query="SELECT toDateTime('1990-10-19 00:00:00')" -TZ=Asia/Kathmandu ${CLICKHOUSE_LOCAL} -s --query="SELECT toDateTime('1990-10-19 00:00:00')" +TZ=Europe/Moscow ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" +TZ=Asia/Colombo ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" +TZ=Asia/Kathmandu ${CLICKHOUSE_LOCAL} --query="SELECT toDateTime('1990-10-19 00:00:00')" From 18857ed42825073679681684af81f0008d498b4f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 03:19:41 +0300 Subject: [PATCH 115/192] Increased passing for "00614_shard_same_header_for_local_and_remote_node_in_distributed_query" [#CLICKHOUSE-2] --- dbms/tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test index 47f3d93f32e..5cd4985b3c0 100755 --- a/dbms/tests/clickhouse-test +++ b/dbms/tests/clickhouse-test @@ -163,7 +163,7 @@ def main(args): report_testcase = et.Element("testcase", attrib = {"name": name}) try: - print "{0:70}".format(name + ": "), + print "{0:72}".format(name + ": "), sys.stdout.flush() if args.skip and any(s in name for s in args.skip): From a2674d46fd87095f47f5d69b983fe126b34f58ff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 03:49:18 +0300 Subject: [PATCH 116/192] Fixed wrong code #2887 --- dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp b/dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp index e5cf72590f0..ebdffd493df 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp +++ b/dbms/src/AggregateFunctions/AggregateFunctionRetention.cpp @@ -14,7 +14,10 @@ AggregateFunctionPtr createAggregateFunctionRetention(const std::string & name, { assertNoParameters(name, params); - if (arguments.size() > AggregateFunctionRetentionData::max_events ) + if (arguments.size() < 2) + throw Exception("Not enough event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments.size() > AggregateFunctionRetentionData::max_events) throw Exception("Too many event arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return std::make_shared(arguments); From d372430193b63ddd6466e89e661d10057f6b9f82 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:01:25 +0300 Subject: [PATCH 117/192] Split long running tests [#CLICKHOUSE-2] --- .../00534_long_functions_bad_arguments10.reference | 1 + .../0_stateless/00534_long_functions_bad_arguments10.sh | 8 ++++++++ .../00534_long_functions_bad_arguments11.reference | 1 + .../0_stateless/00534_long_functions_bad_arguments11.sh | 8 ++++++++ .../00534_long_functions_bad_arguments12.reference | 1 + .../0_stateless/00534_long_functions_bad_arguments12.sh | 8 ++++++++ .../00534_long_functions_bad_arguments13.reference | 1 + .../0_stateless/00534_long_functions_bad_arguments13.sh | 8 ++++++++ .../00534_long_functions_bad_arguments8.reference | 2 -- .../0_stateless/00534_long_functions_bad_arguments8.sh | 2 -- .../00534_long_functions_bad_arguments9.reference | 2 -- .../0_stateless/00534_long_functions_bad_arguments9.sh | 4 +--- 12 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.reference create mode 100755 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.sh create mode 100644 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.reference create mode 100755 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.sh create mode 100644 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.reference create mode 100755 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.sh create mode 100644 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.reference create mode 100755 dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.sh diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.reference new file mode 100644 index 00000000000..7193c3d3f3d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.reference @@ -0,0 +1 @@ +Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.sh new file mode 100755 index 00000000000..83e6718e8fb --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments10.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/00534_long_functions_bad_arguments.lib + +test_variant 'SELECT $_([NULL],[NULL]);' diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.reference new file mode 100644 index 00000000000..7193c3d3f3d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.reference @@ -0,0 +1 @@ +Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.sh new file mode 100755 index 00000000000..9f18e07dfe7 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments11.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/00534_long_functions_bad_arguments.lib + +test_variant 'SELECT $_(NULL, NULL, NULL);' diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.reference new file mode 100644 index 00000000000..7193c3d3f3d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.reference @@ -0,0 +1 @@ +Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.sh new file mode 100755 index 00000000000..e480ad631fe --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments12.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/00534_long_functions_bad_arguments.lib + +test_variant 'SELECT $_([], [], []);' diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.reference new file mode 100644 index 00000000000..7193c3d3f3d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.reference @@ -0,0 +1 @@ +Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.sh new file mode 100755 index 00000000000..c92851a6cfd --- /dev/null +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments13.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/00534_long_functions_bad_arguments.lib + +test_variant 'SELECT $_([NULL], [NULL], [NULL]);' diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.reference index 0d72c2d7fe0..7193c3d3f3d 100644 --- a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.reference +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.reference @@ -1,3 +1 @@ Still alive -Still alive -Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.sh index f884a926c3b..28f2d55b47a 100755 --- a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.sh +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments8.sh @@ -6,5 +6,3 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/00534_long_functions_bad_arguments.lib test_variant 'SELECT $_(NULL, NULL);' -test_variant 'SELECT $_([], []);' -test_variant 'SELECT $_([NULL],[NULL]);' diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.reference b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.reference index 0d72c2d7fe0..7193c3d3f3d 100644 --- a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.reference +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.reference @@ -1,3 +1 @@ Still alive -Still alive -Still alive diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.sh b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.sh index bc1ceda33fa..a22f399bfac 100755 --- a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.sh +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments9.sh @@ -5,6 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/00534_long_functions_bad_arguments.lib -test_variant 'SELECT $_(NULL, NULL, NULL);' -test_variant 'SELECT $_([], [], []);' -test_variant 'SELECT $_([NULL], [NULL], [NULL]);' +test_variant 'SELECT $_([], []);' From d95e2be34441bdbff2eab1a15819daaf994d2922 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:05:04 +0300 Subject: [PATCH 118/192] Updated test #2887 --- .../queries/0_stateless/00688_aggregation_retention.reference | 1 - dbms/tests/queries/0_stateless/00688_aggregation_retention.sql | 1 - 2 files changed, 2 deletions(-) diff --git a/dbms/tests/queries/0_stateless/00688_aggregation_retention.reference b/dbms/tests/queries/0_stateless/00688_aggregation_retention.reference index 95a6127a23f..e0dce2baba7 100644 --- a/dbms/tests/queries/0_stateless/00688_aggregation_retention.reference +++ b/dbms/tests/queries/0_stateless/00688_aggregation_retention.reference @@ -1,4 +1,3 @@ -80 80 50 80 60 80 50 60 diff --git a/dbms/tests/queries/0_stateless/00688_aggregation_retention.sql b/dbms/tests/queries/0_stateless/00688_aggregation_retention.sql index 578f91833d0..45f62cb05d9 100644 --- a/dbms/tests/queries/0_stateless/00688_aggregation_retention.sql +++ b/dbms/tests/queries/0_stateless/00688_aggregation_retention.sql @@ -5,7 +5,6 @@ INSERT INTO retention_test SELECT '2018-08-06', number FROM numbers(80); INSERT INTO retention_test SELECT '2018-08-07', number FROM numbers(50); INSERT INTO retention_test SELECT '2018-08-08', number FROM numbers(60); -SELECT sum(r[1]) as r1 FROM (SELECT uid, retention(date = '2018-08-06') AS r FROM retention_test WHERE date IN ('2018-08-06') GROUP BY uid); SELECT sum(r[1]) as r1, sum(r[2]) as r2 FROM (SELECT uid, retention(date = '2018-08-06', date = '2018-08-07') AS r FROM retention_test WHERE date IN ('2018-08-06', '2018-08-07') GROUP BY uid); SELECT sum(r[1]) as r1, sum(r[2]) as r2 FROM (SELECT uid, retention(date = '2018-08-06', date = '2018-08-08') AS r FROM retention_test WHERE date IN ('2018-08-06', '2018-08-08') GROUP BY uid); SELECT sum(r[1]) as r1, sum(r[2]) as r2, sum(r[3]) as r3 FROM (SELECT uid, retention(date = '2018-08-06', date = '2018-08-07', date = '2018-08-08') AS r FROM retention_test WHERE date IN ('2018-08-06', '2018-08-07', '2018-08-08') GROUP BY uid); From 246f1947c51fca9dea9e2e92d211c31675e33c5c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:06:21 +0300 Subject: [PATCH 119/192] Improvement #2887 --- .../AggregateFunctionRetention.h | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionRetention.h b/dbms/src/AggregateFunctions/AggregateFunctionRetention.h index be5e0810d13..c48e81a4f52 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionRetention.h @@ -126,19 +126,23 @@ public: void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - auto & data_to = static_cast(to).getData(); + auto & data_to = static_cast(static_cast(to).getData()).getData(); auto & offsets_to = static_cast(to).getOffsets(); + ColumnArray::Offset current_offset = data_to.size(); + data_to.resize(current_offset + events_size); + const bool first_flag = this->data(place).events.test(0); - data_to.insert(first_flag ? Field(static_cast(1)) : Field(static_cast(0))); - for (const auto i : ext::range(1, events_size)) + data_to[current_offset] = first_flag; + ++current_offset; + + for (size_t i = 1; i < events_size; ++i) { - if (first_flag && this->data(place).events.test(i)) - data_to.insert(Field(static_cast(1))); - else - data_to.insert(Field(static_cast(0))); + data_to[current_offset] = (first_flag && this->data(place).events.test(i)); + ++current_offset; } - offsets_to.push_back(offsets_to.size() == 0 ? events_size : offsets_to.back() + events_size); + + offsets_to.push_back(current_offset); } const char * getHeaderFilePath() const override From b9778a3764cb1816e401bfcf323e1ec02468db6d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:31:28 +0300 Subject: [PATCH 120/192] Speed up fuzz tests [#CLICKHOUSE-2] --- dbms/src/Interpreters/Settings.h | 1 + dbms/src/Interpreters/executeQuery.cpp | 13 ++++++++----- .../00534_long_functions_bad_arguments.lib | 2 +- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index e1e02af96e4..342725b3902 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -283,6 +283,7 @@ struct Settings M(SettingUInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.") \ M(SettingBool, asterisk_left_columns_only, 0, "If it is set to true, the asterisk only return left of join query.") \ M(SettingUInt64, http_max_multipart_form_data_size, 1024 * 1024 * 1024, "Limit on size of multipart/form-data content. This setting cannot be parsed from URL parameters and should be set in user profile. Note that content is parsed and external tables are created in memory before start of query execution. And this is the only limit that has effect on that stage (limits on max memory usage and max execution time have no effect while reading HTTP form data).") \ + M(SettingBool, calculate_text_stack_trace, 1, "Calculate text stack trace in case of exceptions during query execution. This is the default. It requires symbol lookups that may slow down fuzzing tests when huge amount of wrong queries are executed. In normal cases you should not disable this option.") \ #define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 8de5fa6737c..80c021e6f29 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -45,6 +45,7 @@ static void checkASTSizeLimits(const IAST & ast, const Settings & settings) } +/// NOTE This is wrong in case of single-line comments and in case of multiline string literals. static String joinLines(const String & query) { String res = query; @@ -99,7 +100,7 @@ static void onExceptionBeforeStart(const String & query, Context & context, time /// Exception before the query execution. context.getQuota().addError(); - bool log_queries = context.getSettingsRef().log_queries; + const Settings & settings = context.getSettingsRef(); /// Log the start of query execution into the table if necessary. QueryLogElement elem; @@ -109,18 +110,19 @@ static void onExceptionBeforeStart(const String & query, Context & context, time elem.event_time = current_time; elem.query_start_time = current_time; - elem.query = query.substr(0, context.getSettingsRef().log_queries_cut_to_length); + elem.query = query.substr(0, settings.log_queries_cut_to_length); elem.exception = getCurrentExceptionMessage(false); elem.client_info = context.getClientInfo(); - setExceptionStackTrace(elem); + if (settings.calculate_text_stack_trace) + setExceptionStackTrace(elem); logException(context, elem); /// Update performance counters before logging to query_log CurrentThread::finalizePerformanceCounters(); - if (log_queries) + if (settings.log_queries) if (auto query_log = context.getQueryLog()) query_log->add(elem); } @@ -363,7 +365,8 @@ static std::tuple executeQueryImpl( elem.profile_counters = std::move(info.profile_counters); } - setExceptionStackTrace(elem); + if (settings.calculate_text_stack_trace) + setExceptionStackTrace(elem); logException(context, elem); /// In case of exception we log internal queries also diff --git a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments.lib b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments.lib index 892581ab298..8ae2e929224 100755 --- a/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments.lib +++ b/dbms/tests/queries/0_stateless/00534_long_functions_bad_arguments.lib @@ -1,6 +1,6 @@ #!/usr/bin/env bash function test_variant { - perl -E "say \$_ for map {chomp; (qq{$1})} qx{$CLICKHOUSE_CLIENT -q 'SELECT name FROM system.functions ORDER BY name;'}" | $CLICKHOUSE_CLIENT -n --ignore-error >/dev/null 2>&1 + perl -E "say \$_ for map {chomp; (qq{$1})} qx{$CLICKHOUSE_CLIENT --calculate_text_stack_trace=0 -q 'SELECT name FROM system.functions ORDER BY name;'}" | $CLICKHOUSE_CLIENT -n --ignore-error >/dev/null 2>&1 $CLICKHOUSE_CLIENT -q "SELECT 'Still alive'" } From 0562ab69e825d293f35272f26f887f9c2c88f5e6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:35:19 +0300 Subject: [PATCH 121/192] Fixed inconsistency between clickhouse-client and clickhouse-local [#CLICKHOUSE-2] --- dbms/programs/local/LocalServer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 4b8d43a8093..775916bc2b1 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -273,7 +273,7 @@ void LocalServer::processQueries() /// Use the same query_id (and thread group) for all queries CurrentThread::QueryScope query_scope_holder(*context); - bool echo_query = config().hasOption("echo") || config().hasOption("verbose"); + bool echo_queries = config().hasOption("echo") || config().hasOption("verbose"); std::exception_ptr exception; for (const auto & query : queries) @@ -281,8 +281,8 @@ void LocalServer::processQueries() ReadBufferFromString read_buf(query); WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); - if (echo_query) - std::cerr << query << '\n'; + if (echo_queries) + std::cout << query << '\n'; try { From 814d2273212a9fb98d34ecf8b1195843a3e9bc0b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:35:46 +0300 Subject: [PATCH 122/192] Fixed test [#CLICKHOUSE-2] --- .../0_stateless/00385_storage_file_and_clickhouse-local_app.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh index 744242e721a..69771ea9247 100755 --- a/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh +++ b/dbms/tests/queries/0_stateless/00385_storage_file_and_clickhouse-local_app.sh @@ -44,7 +44,7 @@ ${CLICKHOUSE_LOCAL} -q "SET max_rows_in_distinct=33; SELECT name, value FROM sys ${CLICKHOUSE_LOCAL} --max_bytes_before_external_group_by=1 --max_block_size=10 -q "SELECT sum(ignore(*)) FROM (SELECT number, count() FROM numbers(1000) GROUP BY number)" echo # Check exta options -(${CLICKHOUSE_LOCAL} --ignore-error --echo -q "SELECT nothing_to_do();SELECT 42;" 2>&1 && echo "Wrong RC") || true +(${CLICKHOUSE_LOCAL} --ignore-error --echo -q "SELECT nothing_to_do();SELECT 42;" 2>/dev/null && echo "Wrong RC") || true echo ${CLICKHOUSE_LOCAL} -q "CREATE TABLE sophisticated_default ( From 451c6cd304539a30a6c3e067e2826fef3e9b6c54 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 23 Aug 2018 04:42:55 +0300 Subject: [PATCH 123/192] Removed a notice produced by gcc during build [#CLICKHOUSE-2] --- dbms/src/Storages/System/StorageSystemSettings.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Storages/System/StorageSystemSettings.cpp b/dbms/src/Storages/System/StorageSystemSettings.cpp index fee9467f6f9..09790ac2fcb 100644 --- a/dbms/src/Storages/System/StorageSystemSettings.cpp +++ b/dbms/src/Storages/System/StorageSystemSettings.cpp @@ -17,6 +17,10 @@ NamesAndTypesList StorageSystemSettings::getNamesAndTypes() }; } +#ifndef __clang__ +#pragma GCC optimize("-fno-var-tracking-assignments") +#endif + void StorageSystemSettings::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const { const Settings & settings = context.getSettingsRef(); From f592aa84d1a2f6c87f9f10494947d9f7f3433881 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 23 Aug 2018 15:04:30 +0300 Subject: [PATCH 124/192] fix vertical merge for empty parts [#CLICKHOUSE-3913] --- dbms/src/DataStreams/ColumnGathererStream.cpp | 28 ++++--------------- dbms/src/DataStreams/ColumnGathererStream.h | 12 ++------ .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 ++-- 3 files changed, 12 insertions(+), 34 deletions(-) diff --git a/dbms/src/DataStreams/ColumnGathererStream.cpp b/dbms/src/DataStreams/ColumnGathererStream.cpp index 6aa8fcd8027..cca71876fb6 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.cpp +++ b/dbms/src/DataStreams/ColumnGathererStream.cpp @@ -21,43 +21,31 @@ namespace ErrorCodes ColumnGathererStream::ColumnGathererStream( const String & column_name_, const BlockInputStreams & source_streams, ReadBuffer & row_sources_buf_, size_t block_preferred_size_) - : column_name(column_name_), row_sources_buf(row_sources_buf_) + : column_name(column_name_), sources(source_streams.size()), row_sources_buf(row_sources_buf_) , block_preferred_size(block_preferred_size_), log(&Logger::get("ColumnGathererStream")) { if (source_streams.empty()) throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED); children.assign(source_streams.begin(), source_streams.end()); -} - -void ColumnGathererStream::init() -{ - sources.reserve(children.size()); for (size_t i = 0; i < children.size(); ++i) { - sources.emplace_back(children[i]->read(), column_name); - - Block & block = sources.back().block; + const Block & header = children[i]->getHeader(); /// Sometimes MergeTreeReader injects additional column with partitioning key - if (block.columns() > 2) + if (header.columns() > 2) throw Exception( - "Block should have 1 or 2 columns, but contains " + toString(block.columns()), + "Block should have 1 or 2 columns, but contains " + toString(header.columns()), ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); - if (!block.has(column_name)) - throw Exception( - "Not found column '" + column_name + "' in block.", - ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); if (i == 0) { column.name = column_name; - column.type = block.getByName(column_name).type; + column.type = header.getByName(column_name).type; column.column = column.type->createColumn(); } - - if (block.getByName(column_name).column->getName() != column.column->getName()) + else if (header.getByName(column_name).column->getName() != column.column->getName()) throw Exception("Column types don't match", ErrorCodes::INCOMPATIBLE_COLUMNS); } } @@ -69,10 +57,6 @@ Block ColumnGathererStream::readImpl() if (children.size() == 1 && row_sources_buf.eof()) return children[0]->read(); - /// Initialize first source blocks - if (sources.empty()) - init(); - if (!source_to_fully_copy && row_sources_buf.eof()) return Block(); diff --git a/dbms/src/DataStreams/ColumnGathererStream.h b/dbms/src/DataStreams/ColumnGathererStream.h index 0a64df05cd6..f52e75d2b30 100644 --- a/dbms/src/DataStreams/ColumnGathererStream.h +++ b/dbms/src/DataStreams/ColumnGathererStream.h @@ -76,16 +76,11 @@ private: /// Cache required fields struct Source { - const IColumn * column; - size_t pos; - size_t size; + const IColumn * column = nullptr; + size_t pos = 0; + size_t size = 0; Block block; - Source(Block && block_, const String & name) : block(std::move(block_)) - { - update(name); - } - void update(const String & name) { column = block.getByName(name).column.get(); @@ -94,7 +89,6 @@ private: } }; - void init(); void fetchNewBlock(Source & source, size_t source_num); String column_name; diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 23a0d3d1943..dc4b97ff6e3 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1027,7 +1027,7 @@ MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMer { if (deduplicate) return MergeAlgorithm::Horizontal; - if (data.context.getMergeTreeSettings().enable_vertical_merge_algorithm == 0) + if (data.settings.enable_vertical_merge_algorithm == 0) return MergeAlgorithm::Horizontal; bool is_supported_storage = @@ -1036,9 +1036,9 @@ MergeTreeDataMergerMutator::MergeAlgorithm MergeTreeDataMergerMutator::chooseMer data.merging_params.mode == MergeTreeData::MergingParams::Replacing || data.merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing; - bool enough_ordinary_cols = gathering_columns.size() >= data.context.getMergeTreeSettings().vertical_merge_algorithm_min_columns_to_activate; + bool enough_ordinary_cols = gathering_columns.size() >= data.settings.vertical_merge_algorithm_min_columns_to_activate; - bool enough_total_rows = sum_rows_upper_bound >= data.context.getMergeTreeSettings().vertical_merge_algorithm_min_rows_to_activate; + bool enough_total_rows = sum_rows_upper_bound >= data.settings.vertical_merge_algorithm_min_rows_to_activate; bool no_parts_overflow = parts.size() <= RowSourcePart::MAX_PARTS; From cc2f10c1cc546029a31f99f7634ddc50bd357219 Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 23 Aug 2018 15:15:30 +0300 Subject: [PATCH 125/192] add test for vertical merge of empty parts [#CLICKHOUSE-3913] --- .../00682_empty_parts_merge.reference | 2 ++ .../0_stateless/00682_empty_parts_merge.sh | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/dbms/tests/queries/0_stateless/00682_empty_parts_merge.reference b/dbms/tests/queries/0_stateless/00682_empty_parts_merge.reference index e69de29bb2d..267e6f2d162 100644 --- a/dbms/tests/queries/0_stateless/00682_empty_parts_merge.reference +++ b/dbms/tests/queries/0_stateless/00682_empty_parts_merge.reference @@ -0,0 +1,2 @@ +*** Vertical merge *** +2 2 diff --git a/dbms/tests/queries/0_stateless/00682_empty_parts_merge.sh b/dbms/tests/queries/0_stateless/00682_empty_parts_merge.sh index abc64ed06f7..9cbdc68c1b1 100755 --- a/dbms/tests/queries/0_stateless/00682_empty_parts_merge.sh +++ b/dbms/tests/queries/0_stateless/00682_empty_parts_merge.sh @@ -21,6 +21,26 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.ordinary" ${CLICKHOUSE_CLIENT} --query="DROP TABLE test.ordinary" +${CLICKHOUSE_CLIENT} --query="SELECT '*** Vertical merge ***'" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.vertical" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.vertical(k UInt32, v UInt32) ENGINE MergeTree ORDER BY k \ + SETTINGS enable_vertical_merge_algorithm=1, \ + vertical_merge_algorithm_min_rows_to_activate=0, \ + vertical_merge_algorithm_min_columns_to_activate=0" + +${CLICKHOUSE_CLIENT} --query="INSERT INTO test.vertical(k, v) VALUES (1, 1)" +${CLICKHOUSE_CLIENT} --query="INSERT INTO test.vertical(k, v) VALUES (2, 2)" + +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.vertical DELETE WHERE k = 1" +wait_for_mutation "vertical" "mutation_3.txt" + +${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE test.vertical PARTITION tuple() FINAL" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.vertical" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.vertical" + + ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.summing" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.summing(k UInt32, v UInt32) ENGINE SummingMergeTree ORDER BY k" From b0512b6d3295b462ee2c54e444522c403f21777e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 23 Aug 2018 18:25:18 +0800 Subject: [PATCH 126/192] Fix misused target_include_directories. --- contrib/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b905dbc1ba7..ba09a88f165 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -104,14 +104,14 @@ endif () if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) add_subdirectory (mariadb-connector-c-cmake) - target_include_directories(mysqlclient PRIVATE BEFORE ${ZLIB_INCLUDE_DIR}) - target_include_directories(mysqlclient PRIVATE BEFORE ${OPENSSL_INCLUDE_DIR}) + target_include_directories(mysqlclient BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) + target_include_directories(mysqlclient BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) add_subdirectory (librdkafka-cmake) - target_include_directories(rdkafka PRIVATE BEFORE ${ZLIB_INCLUDE_DIR}) - target_include_directories(rdkafka PRIVATE BEFORE ${OPENSSL_INCLUDE_DIR}) + target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) + target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () if (ENABLE_ODBC AND USE_INTERNAL_ODBC_LIBRARY) From 38347f120d9ab40fe338f4a1e715762d38208b09 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 23 Aug 2018 15:20:54 +0300 Subject: [PATCH 127/192] fixes --- dbms/src/Common/Stopwatch.cpp | 2 +- dbms/src/Common/ThreadProfileEvents.h | 2 +- dbms/src/Interpreters/tests/internal_iotop.cpp | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/dbms/src/Common/Stopwatch.cpp b/dbms/src/Common/Stopwatch.cpp index 711ab16d3af..d792e054d24 100644 --- a/dbms/src/Common/Stopwatch.cpp +++ b/dbms/src/Common/Stopwatch.cpp @@ -5,7 +5,7 @@ StopwatchRUsage::Timestamp StopwatchRUsage::Timestamp::current() { StopwatchRUsage::Timestamp res; - ::rusage rusage; + ::rusage rusage {}; #if !defined(__APPLE__) ::getrusage(RUSAGE_THREAD, &rusage); #endif diff --git a/dbms/src/Common/ThreadProfileEvents.h b/dbms/src/Common/ThreadProfileEvents.h index 523da7d107b..23a414d2d05 100644 --- a/dbms/src/Common/ThreadProfileEvents.h +++ b/dbms/src/Common/ThreadProfileEvents.h @@ -89,7 +89,7 @@ struct RUsageCounters static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds()) { - ::rusage rusage; + ::rusage rusage {}; #if !defined(__APPLE__) ::getrusage(RUSAGE_THREAD, &rusage); #endif diff --git a/dbms/src/Interpreters/tests/internal_iotop.cpp b/dbms/src/Interpreters/tests/internal_iotop.cpp index d35cc387605..0ca49f94512 100644 --- a/dbms/src/Interpreters/tests/internal_iotop.cpp +++ b/dbms/src/Interpreters/tests/internal_iotop.cpp @@ -6,11 +6,7 @@ #include #include #include - -#if defined(__linux__) #include -#endif - #include #include #include @@ -51,7 +47,6 @@ using namespace DB; void do_io(size_t id) { -#if defined(__linux__) ::taskstats stat; int tid = TaskStatsInfoGetter::getCurrentTID(); TaskStatsInfoGetter get_info; @@ -101,7 +96,6 @@ void do_io(size_t id) } Poco::File(path_dst).remove(false); -#endif } void test_perf() From 518f1f41a3df15e836f06f734e30e4fe824c9b9f Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 23 Aug 2018 15:28:45 +0300 Subject: [PATCH 128/192] clean --- debian/rules | 6 +++++- utils/travis/normal.sh | 12 ++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/debian/rules b/debian/rules index c9ff7635350..1b1f2e717ef 100755 --- a/debian/rules +++ b/debian/rules @@ -24,6 +24,10 @@ ifndef ENABLE_TESTS CMAKE_FLAGS += -DENABLE_TESTS=0 endif +ifndef MAKE_TARGET + MAKE_TARGET = clickhouse-bundle +endif + #DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8) #DEB_CC ?= gcc-7 @@ -70,7 +74,7 @@ override_dh_auto_configure: override_dh_auto_build: # Fix for ninja. Do not add -O. - cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) + cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) $(MAKE_TARGET) #cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) # cmake return true on error override_dh_auto_test: diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 40aeca33424..16482c3f184 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,14 +32,14 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS -ninja clickhouse-bundle \ - `# Skip tests:` \ - `# 00281 requires internal compiler` \ - `# 00428 requires sudo (not all vms allow this)` \ - `# 00385 runs infinitly (TODO: fix it)` \ +ninja clickhouse-bundle +# Skip tests: +# 00281 requires internal compiler +# 00428 requires sudo (not all vms allow this) +# 00385 runs infinitly (TODO: fix it) [ ! ${TEST_RUN=1} ] || ( ( cd $CUR_DIR/../.. && env TEST_OPT="--skip long compile 00428 00385 $TEST_OPT" TEST_PORT_RANDOM= TEST_PERF= TEST_SERVER_STARTUP_WAIT=10 bash -x dbms/tests/clickhouse-test-server ) || ${TEST_TRUE=false} ) date From 762ffa1454891aed1b728d966d8771b30e089608 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 23 Aug 2018 18:31:20 +0300 Subject: [PATCH 129/192] CLICKHOUSE-3894: Small impovements in integration tests framework --- dbms/tests/integration/helpers/client.py | 8 +++++++- dbms/tests/integration/helpers/cluster.py | 10 ++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/helpers/client.py b/dbms/tests/integration/helpers/client.py index 3324a9c8ba3..f5853f8ee8c 100644 --- a/dbms/tests/integration/helpers/client.py +++ b/dbms/tests/integration/helpers/client.py @@ -2,13 +2,19 @@ import errno import subprocess as sp from threading import Timer import tempfile +import os class Client: def __init__(self, host, port=9000, command='/usr/bin/clickhouse-client'): self.host = host self.port = port - self.command = [command, '--host', self.host, '--port', str(self.port), '--stacktrace'] + self.command = [command] + + if os.path.basename(command) == 'clickhouse': + self.command.append('client') + + self.command += ['--host', self.host, '--port', str(self.port), '--stacktrace'] def query(self, sql, stdin=None, timeout=None, settings=None, user=None, ignore_error=False): diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 3aeca080aab..3c3f4912c40 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -40,7 +40,7 @@ class ClickHouseCluster: """ def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None, - zookeeper_config_path=None): + zookeeper_config_path=None, custom_dockerd_host=None): self.base_dir = p.dirname(base_path) self.name = name if name is not None else '' @@ -54,7 +54,13 @@ class ClickHouseCluster: self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower()) self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name)) - self.base_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name] + custom_dockerd_host = custom_dockerd_host or os.environ.get('CLICKHOUSE_TESTS_DOCKERD_HOST', '') + + self.base_cmd = ['docker-compose'] + if custom_dockerd_host: + self.base_cmd += ['--host', custom_dockerd_host] + + self.base_cmd += ['--project-directory', self.base_dir, '--project-name', self.project_name] self.base_zookeeper_cmd = None self.base_mysql_cmd = [] self.base_kafka_cmd = [] From aae696730417d3e88ee5ff9a04c4db0dbabf7446 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 23 Aug 2018 18:38:25 +0300 Subject: [PATCH 130/192] CLICKHOUSE-3894: Remove default from get --- dbms/tests/integration/helpers/cluster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 3c3f4912c40..873bd5c740e 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -54,7 +54,7 @@ class ClickHouseCluster: self.project_name = re.sub(r'[^a-z0-9]', '', self.project_name.lower()) self.instances_dir = p.join(self.base_dir, '_instances' + ('' if not self.name else '_' + self.name)) - custom_dockerd_host = custom_dockerd_host or os.environ.get('CLICKHOUSE_TESTS_DOCKERD_HOST', '') + custom_dockerd_host = custom_dockerd_host or os.environ.get('CLICKHOUSE_TESTS_DOCKERD_HOST') self.base_cmd = ['docker-compose'] if custom_dockerd_host: From 744b310a0f8e3350c6a7a709eb131ace5fe401f5 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 23 Aug 2018 19:06:13 +0300 Subject: [PATCH 131/192] Debian: Dont build utils --- debian/rules | 2 ++ utils/CMakeLists.txt | 27 ++++++++++++++++----------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/debian/rules b/debian/rules index 1b1f2e717ef..33967073091 100755 --- a/debian/rules +++ b/debian/rules @@ -28,6 +28,8 @@ ifndef MAKE_TARGET MAKE_TARGET = clickhouse-bundle endif +CMAKE_FLAGS += -DENABLE_UTILS=0 + #DEB_CLANG ?= $(shell which clang-6.0 || which clang-5.0 || which clang-4.0 || which clang || which clang-3.9 || which clang-3.8) #DEB_CC ?= gcc-7 diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 010e38ebfe0..9e39581cbda 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -3,16 +3,21 @@ if (NOT NO_WERROR) set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror") endif () -add_subdirectory (compressor) -add_subdirectory (iotest) -add_subdirectory (corrector_utf8) +# Utils used in package add_subdirectory (config-processor) -add_subdirectory (zookeeper-cli) -add_subdirectory (zookeeper-dump-tree) -add_subdirectory (zookeeper-remove-by-list) -add_subdirectory (zookeeper-create-entry-to-download-part) -add_subdirectory (wikistat-loader) -add_subdirectory (fill-factor) -add_subdirectory (check-marks) -add_subdirectory (test-data-generator) add_subdirectory (report) + +# Not used in package +if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) + add_subdirectory (compressor) + add_subdirectory (iotest) + add_subdirectory (corrector_utf8) + add_subdirectory (zookeeper-cli) + add_subdirectory (zookeeper-dump-tree) + add_subdirectory (zookeeper-remove-by-list) + add_subdirectory (zookeeper-create-entry-to-download-part) + add_subdirectory (wikistat-loader) + add_subdirectory (fill-factor) + add_subdirectory (check-marks) + add_subdirectory (test-data-generator) +endif () From 08f6305dba7c5e80686f819b7eafedb3bbbb3f2d Mon Sep 17 00:00:00 2001 From: Alexey Zatelepin Date: Thu, 23 Aug 2018 19:33:25 +0300 Subject: [PATCH 132/192] add missed logging message --- .../Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 9bd0da83364..d63633b07c8 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -214,6 +214,7 @@ void ReplicatedMergeTreeBlockOutputStream::commitPart(zkutil::ZooKeeperPtr & zoo if (!block_number_lock) { + LOG_INFO(log, "Block with ID " << block_id << " already exists; ignoring it."); part->is_duplicate = true; last_block_is_duplicate = true; ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); From fe45a62c34b7659315af61ab39c1bcd1244b9026 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 23 Aug 2018 21:39:21 +0300 Subject: [PATCH 133/192] fix --- dbms/src/Common/setThreadName.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Common/setThreadName.cpp b/dbms/src/Common/setThreadName.cpp index c5c4fd54e08..fc228d78ff4 100644 --- a/dbms/src/Common/setThreadName.cpp +++ b/dbms/src/Common/setThreadName.cpp @@ -40,11 +40,11 @@ std::string getThreadName() std::string name(16, '\0'); #if defined(__APPLE__) - if (pthread_getname_np(pthread_self(), name.data(), name.size())); + if (pthread_getname_np(pthread_self(), name.data(), name.size())) throw DB::Exception("Cannot get thread name with pthread_getname_np()", DB::ErrorCodes::PTHREAD_ERROR); #elif defined(__FreeBSD__) // TODO: make test. freebsd will have this function soon https://freshbsd.org/commit/freebsd/r337983 -// if (pthread_get_name_np(pthread_self(), name.data(), name.size())); +// if (pthread_get_name_np(pthread_self(), name.data(), name.size())) // throw DB::Exception("Cannot get thread name with pthread_get_name_np()", DB::ErrorCodes::PTHREAD_ERROR); #else if (0 != prctl(PR_GET_NAME, name.data(), 0, 0, 0)) From d5ccd26348840f0bd95975ea5f98f7445109d2ae Mon Sep 17 00:00:00 2001 From: Max Akhmedov Date: Thu, 23 Aug 2018 21:44:13 +0300 Subject: [PATCH 134/192] More verbose access denied error message --- dbms/src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 37ba4b3f8c8..ec2373fe284 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -632,7 +632,7 @@ void Context::checkDatabaseAccessRightsImpl(const std::string & database_name) c return; } if (!shared->security_manager->hasAccessToDatabase(client_info.current_user, database_name)) - throw Exception("Access denied to database " + database_name, ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception("Access denied to database " + database_name + " for user " + client_info.current_user , ErrorCodes::DATABASE_ACCESS_DENIED); } void Context::addDependency(const DatabaseAndTableName & from, const DatabaseAndTableName & where) From ea80ca898422a52bd611f9206ba635c6b0ec80e8 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 23 Aug 2018 21:44:32 +0300 Subject: [PATCH 135/192] fix --- cmake/find_zlib.cmake | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cmake/find_zlib.cmake b/cmake/find_zlib.cmake index f1f7c128d7b..0e198c9bb0f 100644 --- a/cmake/find_zlib.cmake +++ b/cmake/find_zlib.cmake @@ -1,6 +1,4 @@ -if (NOT OS_FREEBSD AND NOT APPLE) - option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ${NOT_UNBUNDLED}) -endif () +option (USE_INTERNAL_ZLIB_LIBRARY "Set to FALSE to use system zlib library instead of bundled" ${NOT_UNBUNDLED}) if (NOT USE_INTERNAL_ZLIB_LIBRARY) find_package (ZLIB) From b76b5af04495c7337be741272598f4c69e309fa9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 02:59:16 +0300 Subject: [PATCH 136/192] Fixed bad code in clickhouse-local #2923 --- dbms/programs/local/LocalServer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dbms/programs/local/LocalServer.cpp b/dbms/programs/local/LocalServer.cpp index 775916bc2b1..4c6f6ecbd2c 100644 --- a/dbms/programs/local/LocalServer.cpp +++ b/dbms/programs/local/LocalServer.cpp @@ -282,7 +282,11 @@ void LocalServer::processQueries() WriteBufferFromFileDescriptor write_buf(STDOUT_FILENO); if (echo_queries) - std::cout << query << '\n'; + { + writeString(query, write_buf); + writeChar('\n', write_buf); + write_buf.next(); + } try { From 1437e5cbe394c9ba5e5d9fa1bfa979efc9eb6673 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 03:07:25 +0300 Subject: [PATCH 137/192] Fix style [#CLICKHOUSE-2] --- dbms/programs/odbc-bridge/ColumnInfoHandler.cpp | 3 ++- dbms/programs/odbc-bridge/MainHandler.cpp | 3 ++- dbms/programs/server/TCPHandler.cpp | 3 ++- dbms/src/Columns/ColumnWithDictionary.h | 6 ++++-- dbms/src/Functions/IFunction.cpp | 4 ++-- dbms/src/IO/tests/zlib_ng_bug.cpp | 4 ++-- dbms/src/Storages/StorageODBC.cpp | 3 ++- dbms/src/Storages/StorageURL.cpp | 3 ++- 8 files changed, 18 insertions(+), 11 deletions(-) diff --git a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp index 3b4c7f42cea..ead72457fd4 100644 --- a/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/dbms/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -51,7 +51,8 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques Poco::Net::HTMLForm params(request, request.stream()); LOG_TRACE(log, "Request URI: " + request.getURI()); - auto process_error = [&response, this](const std::string & message) { + auto process_error = [&response, this](const std::string & message) + { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) response.send() << message << std::endl; diff --git a/dbms/programs/odbc-bridge/MainHandler.cpp b/dbms/programs/odbc-bridge/MainHandler.cpp index e7764907dbe..04c957d0f21 100644 --- a/dbms/programs/odbc-bridge/MainHandler.cpp +++ b/dbms/programs/odbc-bridge/MainHandler.cpp @@ -49,7 +49,8 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne Poco::Net::HTMLForm params(request, request.stream()); LOG_TRACE(log, "Request URI: " + request.getURI()); - auto process_error = [&response, this](const std::string & message) { + auto process_error = [&response, this](const std::string & message) + { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) response.send() << message << std::endl; diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index c2f8d34b5d2..d129a94c78c 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -158,7 +158,8 @@ void TCPHandler::runImpl() CurrentThread::attachInternalTextLogsQueue(state.logs_queue); } - query_context.setExternalTablesInitializer([&global_settings, this] (Context & context) { + query_context.setExternalTablesInitializer([&global_settings, this] (Context & context) + { if (&context != &query_context) throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); diff --git a/dbms/src/Columns/ColumnWithDictionary.h b/dbms/src/Columns/ColumnWithDictionary.h index 5d68dca5796..86bd7245980 100644 --- a/dbms/src/Columns/ColumnWithDictionary.h +++ b/dbms/src/Columns/ColumnWithDictionary.h @@ -112,8 +112,10 @@ public: std::vector scatter(ColumnIndex num_columns, const Selector & selector) const override; - void gather(ColumnGathererStream & gatherer_stream) override ; - void getExtremes(Field & min, Field & max) const override { + void gather(ColumnGathererStream & gatherer_stream) override; + + void getExtremes(Field & min, Field & max) const override + { return getDictionary().index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize } diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index f0735b189a2..25e448ba4bb 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -234,8 +234,8 @@ void PreparedFunctionImpl::executeWithoutColumnsWithDictionary(Block & block, co executeImpl(block, args, result, input_rows_count); } -static ColumnPtr replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes(Block & block, const ColumnNumbers & args, - bool can_be_executed_on_default_arguments) +static ColumnPtr replaceColumnsWithDictionaryByNestedAndGetDictionaryIndexes( + Block & block, const ColumnNumbers & args, bool can_be_executed_on_default_arguments) { size_t num_rows = 0; ColumnPtr indexes; diff --git a/dbms/src/IO/tests/zlib_ng_bug.cpp b/dbms/src/IO/tests/zlib_ng_bug.cpp index d5ce5d00b12..8b94b4e49d2 100644 --- a/dbms/src/IO/tests/zlib_ng_bug.cpp +++ b/dbms/src/IO/tests/zlib_ng_bug.cpp @@ -8,11 +8,11 @@ abcdefghijklmn!@Aab#AAabcdefghijklmn$% xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx * There are two lines. First line make sense. Second line contains padding to make file size large enough. - * Compile with + * Compile with * cmake -D SANITIZE=address * and run: -./zlib_ng_bug data2.bin +./zlib_ng_bug data2.bin ================================================================= ==204952==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6310000147ff at pc 0x000000596d7a bp 0x7ffd139edd50 sp 0x7ffd139edd48 READ of size 1 at 0x6310000147ff thread T0 diff --git a/dbms/src/Storages/StorageODBC.cpp b/dbms/src/Storages/StorageODBC.cpp index b2474896b62..94511aafcff 100644 --- a/dbms/src/Storages/StorageODBC.cpp +++ b/dbms/src/Storages/StorageODBC.cpp @@ -89,7 +89,8 @@ BlockInputStreams StorageODBC::read(const Names & column_names, void registerStorageODBC(StorageFactory & factory) { - factory.registerStorage("ODBC", [](const StorageFactory::Arguments & args) { + factory.registerStorage("ODBC", [](const StorageFactory::Arguments & args) + { ASTs & engine_args = args.engine_args; if (engine_args.size() != 3) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 10816537fb9..901522b482a 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -188,7 +188,8 @@ BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const Sett void registerStorageURL(StorageFactory & factory) { - factory.registerStorage("URL", [](const StorageFactory::Arguments & args) { + factory.registerStorage("URL", [](const StorageFactory::Arguments & args) + { ASTs & engine_args = args.engine_args; if (!(engine_args.size() == 1 || engine_args.size() == 2)) From 10ef8caa3b44005aa37cf5f8e5efae477cad5f33 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 03:09:32 +0300 Subject: [PATCH 138/192] Implemented TODO [#CLICKHOUSE-2] --- debian/clickhouse-server.postinst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index 57882006d78..eb3781225a6 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -82,12 +82,6 @@ Please fix this and reinstall this package." >&2 chmod 775 ${CLICKHOUSE_LOGDIR} fi - - if [ -d ${CLICKHOUSE_LOGDIR} ]; then - # only for compatibility for old metrika user, remove string after 2017-06-01 - su -s /bin/sh ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}" || chown -R root:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}; chmod -R ug+rw ${CLICKHOUSE_LOGDIR} - fi - # Clean old dynamic compilation results if [ -d "${CLICKHOUSE_DATADIR}/build" ]; then rm -f ${CLICKHOUSE_DATADIR}/build/*.cpp ${CLICKHOUSE_DATADIR}/build/*.so ||: From b0c36ab99047178bb0f60ef982f17696e5ed66c6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 03:21:03 +0300 Subject: [PATCH 139/192] Moved "setcap" from init script to postinst #2482 --- debian/clickhouse-server.init | 2 -- debian/clickhouse-server.postinst | 9 ++++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index 780b51ea46f..e92b3e281df 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -164,8 +164,6 @@ start() else mkdir -p $CLICKHOUSE_PIDDIR chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR - # Set net_admin capabilities to support ClickHouse better introspection - setcap cap_net_admin=+ep "$BINDIR/$GENERIC_PROGRAM" initdb if ! is_running; then # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index eb3781225a6..269e7c2da0c 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -6,6 +6,9 @@ CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP=${CLICKHOUSE_USER}} CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR=/etc/clickhouse-server} CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} +CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR=/usr/bin} +CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM=clickhouse} + OS=${OS=`lsb_release -is 2>/dev/null || uname -s || true`} test -f /etc/default/clickhouse && . /etc/default/clickhouse @@ -82,9 +85,13 @@ Please fix this and reinstall this package." >&2 chmod 775 ${CLICKHOUSE_LOGDIR} fi + # Set net_admin capabilities to support introspection of "taskstats" performance metrics from the kernel. + command -v setcap >/dev/null \ + && setcap cap_net_admin=+ep "${CLICKHOUSE_BINDIR}/${CLICKHOUSE_GENERIC_PROGRAM}" \ + || echo "Cannot set 'net_admin' capability for clickhouse binary. This is optional. Taskstats accounting will be disabled. To enable taskstats accounting you may add the required capability later manually." + # Clean old dynamic compilation results if [ -d "${CLICKHOUSE_DATADIR}/build" ]; then rm -f ${CLICKHOUSE_DATADIR}/build/*.cpp ${CLICKHOUSE_DATADIR}/build/*.so ||: fi - fi From 38d5d8eaad6fc1a42c20ebe4889f0a3de131d585 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 03:53:09 +0300 Subject: [PATCH 140/192] Reverted #2325 #2261 --- .../Common/getNumberOfPhysicalCPUCores.cpp | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp index b1f091bd5e6..0a686b9c772 100644 --- a/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp +++ b/dbms/src/Common/getNumberOfPhysicalCPUCores.cpp @@ -1,6 +1,5 @@ #include #include -#include #if defined(__x86_64__) @@ -14,25 +13,6 @@ unsigned getNumberOfPhysicalCPUCores() { -#if defined(__linux__) - /// On Linux we try to look at Cgroups limit if it is available. - std::ifstream cgroup_read_in("/sys/fs/cgroup/cpu/cpu.cfs_quota_us"); - if (cgroup_read_in.is_open()) - { - std::string allocated_cpus_share_str{ std::istreambuf_iterator(cgroup_read_in), std::istreambuf_iterator() }; - int allocated_cpus_share_int = std::stoi(allocated_cpus_share_str); - - cgroup_read_in.close(); - - // If a valid value is present - if (allocated_cpus_share_int > 0) - { - unsigned allocated_cpus = (allocated_cpus_share_int + 999) / 1000; - return allocated_cpus; - } - } -#endif - #if defined(__x86_64__) cpu_raw_data_t raw_data; if (0 != cpuid_get_raw_data(&raw_data)) From 32cc611f5d5d7e585930d3d290e06edbaa2bc805 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Wed, 22 Aug 2018 14:42:37 +0800 Subject: [PATCH 141/192] ISSUES-863 fix predicate optimizer not work for asterisk --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 164 +---------- dbms/src/Interpreters/ExpressionAnalyzer.h | 14 +- .../PredicateExpressionsOptimizer.cpp | 273 +++++++++++++----- .../PredicateExpressionsOptimizer.h | 31 +- dbms/src/Interpreters/Settings.h | 2 +- dbms/src/Interpreters/evaluateQualified.cpp | 160 ++++++++++ dbms/src/Interpreters/evaluateQualified.h | 34 +++ .../00597_push_down_predicate.reference | 20 +- .../0_stateless/00597_push_down_predicate.sql | 65 ++--- 9 files changed, 465 insertions(+), 298 deletions(-) create mode 100644 dbms/src/Interpreters/evaluateQualified.cpp create mode 100644 dbms/src/Interpreters/evaluateQualified.h diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 889524a3fa5..aeb44ee4afb 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -66,6 +66,7 @@ #include #include #include +#include namespace DB @@ -164,35 +165,6 @@ void removeDuplicateColumns(NamesAndTypesList & columns) } - -String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const -{ - return (!alias.empty() ? alias : (database + '.' + table)) + '.'; -} - - -void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const -{ - if (auto identifier = typeid_cast(ast.get())) - { - String prefix = getQualifiedNamePrefix(); - identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); - - Names qualifiers; - if (!alias.empty()) - qualifiers.push_back(alias); - else - { - qualifiers.push_back(database); - qualifiers.push_back(table); - } - - for (const auto & qualifier : qualifiers) - identifier->children.emplace_back(std::make_shared(qualifier)); - } -} - - ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & ast_, const Context & context_, @@ -274,7 +246,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( getArrayJoinedColumns(); /// Push the predicate expression down to the subqueries. - rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings).optimize(); + rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize(); /// Delete the unnecessary from `source_columns` list. Create `unknown_required_source_columns`. Form `columns_added_by_join`. collectUsedColumns(); @@ -293,46 +265,6 @@ ExpressionAnalyzer::ExpressionAnalyzer( analyzeAggregation(); } -static DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, - const Context & context) -{ - DatabaseAndTableWithAlias database_and_table_with_alias; - - if (table_expression.database_and_table_name) - { - const auto & identifier = static_cast(*table_expression.database_and_table_name); - - database_and_table_with_alias.alias = identifier.tryGetAlias(); - - if (table_expression.database_and_table_name->children.empty()) - { - database_and_table_with_alias.database = context.getCurrentDatabase(); - database_and_table_with_alias.table = identifier.name; - } - else - { - if (table_expression.database_and_table_name->children.size() != 2) - throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - - database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; - database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; - } - } - else if (table_expression.table_function) - { - database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); - } - else if (table_expression.subquery) - { - database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); - } - else - throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); - - return database_and_table_with_alias; -} - - void ExpressionAnalyzer::translateQualifiedNames() { if (!select_query || !select_query->tables || select_query->tables->children.empty()) @@ -357,80 +289,6 @@ void ExpressionAnalyzer::translateQualifiedNames() translateQualifiedNamesImpl(ast, tables); } - -/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. -static size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, - const DatabaseAndTableWithAlias & names) -{ - size_t num_qualifiers_to_strip = 0; - - auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; - - /// It is compound identifier - if (!identifier.children.empty()) - { - size_t num_components = identifier.children.size(); - - /// database.table.column - if (num_components >= 3 - && !names.database.empty() - && get_identifier_name(identifier.children[0]) == names.database - && get_identifier_name(identifier.children[1]) == names.table) - { - num_qualifiers_to_strip = 2; - } - - /// table.column or alias.column. If num_components > 2, it is like table.nested.column. - if (num_components >= 2 - && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) - || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) - { - num_qualifiers_to_strip = 1; - } - } - - return num_qualifiers_to_strip; -} - - -/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. -/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. -static void stripIdentifier(ASTPtr & ast, size_t num_qualifiers_to_strip) -{ - ASTIdentifier * identifier = typeid_cast(ast.get()); - - if (!identifier) - throw Exception("ASTIdentifier expected for stripIdentifier", ErrorCodes::LOGICAL_ERROR); - - if (num_qualifiers_to_strip) - { - size_t num_components = identifier->children.size(); - - /// plain column - if (num_components - num_qualifiers_to_strip == 1) - { - String node_alias = identifier->tryGetAlias(); - ast = identifier->children.back(); - if (!node_alias.empty()) - ast->setAlias(node_alias); - } - else - /// nested column - { - identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip); - String new_name; - for (const auto & child : identifier->children) - { - if (!new_name.empty()) - new_name += '.'; - new_name += static_cast(*child.get()).name; - } - identifier->name = new_name; - } - } -} - - void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::vector & tables) { if (auto * identifier = typeid_cast(ast.get())) @@ -509,7 +367,6 @@ void ExpressionAnalyzer::translateQualifiedNamesImpl(ASTPtr & ast, const std::ve } } - void ExpressionAnalyzer::optimizeIfWithConstantCondition() { optimizeIfWithConstantConditionImpl(ast, aliases); @@ -765,23 +622,6 @@ void ExpressionAnalyzer::findExternalTables(ASTPtr & ast) external_tables[node->name] = external_storage; } - -static std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier) -{ - std::pair res; - res.second = identifier.name; - if (!identifier.children.empty()) - { - if (identifier.children.size() != 2) - throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR); - - res.first = typeid_cast(*identifier.children[0]).name; - res.second = typeid_cast(*identifier.children[1]).name; - } - return res; -} - - static std::shared_ptr interpretSubquery( const ASTPtr & subquery_or_table_name, const Context & context, size_t subquery_depth, const Names & required_source_columns) { diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 28d4ca7c10f..ebb25234424 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -91,19 +92,6 @@ struct ScopeStack const Block & getSampleBlock() const; }; -struct DatabaseAndTableWithAlias -{ - String database; - String table; - String alias; - - /// "alias." or "database.table." if alias is empty - String getQualifiedNamePrefix() const; - - /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. - void makeQualifiedName(const ASTPtr & ast) const; -}; - /** Transforms an expression from a syntax tree into a sequence of actions to execute it. * * NOTE: if `ast` is a SELECT query from a table, the structure of this table should not change during the lifetime of ExpressionAnalyzer. diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 7f6470f7218..d3e5fa05cee 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -2,11 +2,11 @@ #include #include #include -#include #include -#include -#include #include +#include +#include +#include namespace DB { @@ -14,18 +14,18 @@ namespace DB static constexpr auto and_function_name = "and"; PredicateExpressionsOptimizer::PredicateExpressionsOptimizer( - ASTSelectQuery * ast_select_, const Settings & settings_) - : ast_select(ast_select_), settings(settings_) + ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_) + : ast_select(ast_select_), settings(settings_), context(context_) { } bool PredicateExpressionsOptimizer::optimize() { - if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables) + if (!settings.enable_optimize_predicate_expression || !ast_select || !ast_select->tables || ast_select->tables->children.empty()) return false; SubqueriesProjectionColumns all_subquery_projection_columns; - getAllSubqueryProjectionColumns(ast_select->tables.get(), all_subquery_projection_columns); + getAllSubqueryProjectionColumns(all_subquery_projection_columns); bool is_rewrite_subqueries = false; if (!all_subquery_projection_columns.empty()) @@ -42,11 +42,16 @@ bool PredicateExpressionsOptimizer::optimizeImpl( /// split predicate with `and` PredicateExpressions outer_predicate_expressions = splitConjunctionPredicate(outer_expression); + std::vector tables_expression = getSelectTablesExpression(ast_select); + std::vector database_and_table_with_aliases; + for (const auto & table_expression : tables_expression) + database_and_table_with_aliases.emplace_back(getTableNameWithAliasFromTableExpression(*table_expression, context)); + bool is_rewrite_subquery = false; for (const auto & outer_predicate : outer_predicate_expressions) { - ASTs outer_predicate_dependent; - getExpressionDependentColumns(outer_predicate, outer_predicate_dependent); + IdentifiersWithQualifiedNameSet outer_predicate_dependencies; + getDependenciesAndQualifiedOfExpression(outer_predicate, outer_predicate_dependencies, database_and_table_with_aliases); /// TODO: remove origin expression for (const auto & subquery_projection_columns : subqueries_projection_columns) @@ -55,10 +60,10 @@ bool PredicateExpressionsOptimizer::optimizeImpl( const ProjectionsWithAliases projection_columns = subquery_projection_columns.second; OptimizeKind optimize_kind = OptimizeKind::NONE; - if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependent, is_prewhere, optimize_kind)) + if (!cannotPushDownOuterPredicate(projection_columns, subquery, outer_predicate_dependencies, is_prewhere, optimize_kind)) { ASTPtr inner_predicate; - cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, outer_predicate_dependent, inner_predicate); + cloneOuterPredicateForInnerPredicate(outer_predicate, projection_columns, database_and_table_with_aliases, inner_predicate); switch(optimize_kind) { @@ -109,34 +114,57 @@ PredicateExpressions PredicateExpressionsOptimizer::splitConjunctionPredicate(AS return predicate_expressions; } -void PredicateExpressionsOptimizer::getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns) +void PredicateExpressionsOptimizer::getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, + IdentifiersWithQualifiedNameSet & dependencies_and_qualified, + std::vector & tables_with_aliases) { - if (!typeid_cast(expression.get())) + if (const auto identifier = typeid_cast(expression.get())) + { + if (!identifier->children.empty()) + dependencies_and_qualified.emplace_back(std::pair(identifier, expression->getAliasOrColumnName())); + else + { + size_t best_table_pos = 0; + size_t max_num_qualifiers_to_strip = 0; + + /// translate qualifiers for dependent columns + for (size_t table_pos = 0; table_pos < tables_with_aliases.size(); ++table_pos) + { + const auto & table = tables_with_aliases[table_pos]; + auto num_qualifiers_to_strip = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, table); + + if (num_qualifiers_to_strip > max_num_qualifiers_to_strip) + { + max_num_qualifiers_to_strip = num_qualifiers_to_strip; + best_table_pos = table_pos; + } + } + + String qualified_name = tables_with_aliases[best_table_pos].getQualifiedNamePrefix() + expression->getAliasOrColumnName(); + dependencies_and_qualified.emplace_back(std::pair(identifier, qualified_name)); + } + } + else { for (const auto & child : expression->children) - getExpressionDependentColumns(child, expression_dependent_columns); - - return; + getDependenciesAndQualifiedOfExpression(child, dependencies_and_qualified, tables_with_aliases); } - - expression_dependent_columns.emplace_back(expression); } bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate( const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind) + IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind) { if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_offset || subquery->with_expression_list) return true; - for (auto & dependent_column : expression_dependent_columns) + for (auto & predicate_dependency : outer_predicate_dependencies) { bool is_found = false; - String dependent_column_name = dependent_column->getAliasOrColumnName(); for (auto projection_column : subquery_projection_columns) { - if (projection_column.second == dependent_column_name) + if (projection_column.second == predicate_dependency.second) { is_found = true; optimize_kind = isAggregateFunction(projection_column.first) ? OptimizeKind::PUSH_TO_HAVING : optimize_kind; @@ -168,39 +196,21 @@ bool PredicateExpressionsOptimizer::isAggregateFunction(ASTPtr & node) return false; } -void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns) -{ - if (auto ast_subquery = typeid_cast(node)) - { - ASTs output_projection; - IAST * subquery = ast_subquery->children.at(0).get(); - getSubqueryProjectionColumns(subquery, all_subquery_projection_columns, output_projection); - return; - } - - for (auto & child : node->children) - getAllSubqueryProjectionColumns(child.get(), all_subquery_projection_columns); -} - void PredicateExpressionsOptimizer::cloneOuterPredicateForInnerPredicate( - const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns, - ASTPtr & inner_predicate) + const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, + std::vector & tables, ASTPtr & inner_predicate) { inner_predicate = outer_predicate->clone(); - ASTs new_expression_require_columns; - new_expression_require_columns.reserve(predicate_dependent_columns.size()); - getExpressionDependentColumns(inner_predicate, new_expression_require_columns); + IdentifiersWithQualifiedNameSet new_expression_requires; + getDependenciesAndQualifiedOfExpression(inner_predicate, new_expression_requires, tables); - for (auto & expression : new_expression_require_columns) + for (auto & require : new_expression_requires) { - if (auto identifier = typeid_cast(expression.get())) + for (auto projection : projection_columns) { - for (auto projection : projection_columns) - { - if (identifier->name == projection.second) - identifier->name = projection.first->getAliasOrColumnName(); - } + if (require.second == projection.second) + require.first->name = projection.first->getAliasOrColumnName(); } } } @@ -221,32 +231,155 @@ bool PredicateExpressionsOptimizer::optimizeExpression(const ASTPtr & outer_expr return true; } -void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections) +void PredicateExpressionsOptimizer::getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns) { - if (auto * with_union_subquery = typeid_cast(subquery)) - for (auto & select : with_union_subquery->list_of_selects->children) - getSubqueryProjectionColumns(select.get(), all_subquery_projection_columns, output_projections); + const auto tables_expression = getSelectTablesExpression(ast_select); - - if (auto * without_union_subquery = typeid_cast(subquery)) + for (const auto & table_expression : tables_expression) { - const auto expression_list = without_union_subquery->select_expression_list->children; - - /// use first projection as the output projection - if (output_projections.empty()) - output_projections = expression_list; - - if (output_projections.size() != expression_list.size()) - throw Exception("Number of columns doesn't match", ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH); - - ProjectionsWithAliases subquery_projections; - subquery_projections.reserve(expression_list.size()); - - for (size_t idx = 0; idx < expression_list.size(); idx++) - subquery_projections.emplace_back(std::pair(expression_list.at(idx), output_projections.at(idx)->getAliasOrColumnName())); - - all_subquery_projection_columns.insert(std::pair(subquery, subquery_projections)); + if (table_expression->subquery) + { + /// Use qualifiers to translate the columns of subqueries + const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context); + String qualified_name_prefix = database_and_table_with_alias.getQualifiedNamePrefix(); + getSubqueryProjectionColumns(all_subquery_projection_columns, qualified_name_prefix, + static_cast(table_expression->subquery.get())->children[0]); + } } } +void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, + String & qualified_name_prefix, const ASTPtr & subquery) +{ + ASTs select_with_union_projections; + auto select_with_union_query = static_cast(subquery.get()); + + for (auto & select_without_union_query : select_with_union_query->list_of_selects->children) + { + ProjectionsWithAliases subquery_projections; + auto select_projection_columns = getSelectQueryProjectionColumns(select_without_union_query); + + if (select_with_union_projections.empty()) + select_with_union_projections = select_projection_columns; + + for (size_t i = 0; i < select_projection_columns.size(); i++) + subquery_projections.emplace_back(std::pair(select_projection_columns[i], + qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName())); + + all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections)); + } +} + +ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast) +{ + ASTs projection_columns; + auto select_query = static_cast(ast.get()); + + for (const auto & projection_column : select_query->select_expression_list->children) + { + if (typeid_cast(projection_column.get()) || typeid_cast(projection_column.get())) + { + ASTs evaluated_columns = evaluateAsterisk(select_query, projection_column); + + for (const auto & column : evaluated_columns) + projection_columns.emplace_back(column); + + continue; + } + + projection_columns.emplace_back(projection_column); + } + return projection_columns; +} + +ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery *select_query, const ASTPtr &asterisk) +{ + if (!select_query->tables || select_query->tables->children.empty()) + throw Exception("Logical error: The asterisk cannot be replaced, because there is no table.", ErrorCodes::LOGICAL_ERROR); + + std::vector tables_expression = getSelectTablesExpression(select_query); + + if (const auto qualified_asterisk = typeid_cast(asterisk.get())) + { + if (qualified_asterisk->children.size() != 1) + throw Exception("Logical error: qualified asterisk must have exactly one child", ErrorCodes::LOGICAL_ERROR); + + ASTIdentifier * ident = typeid_cast(qualified_asterisk->children[0].get()); + if (!ident) + throw Exception("Logical error: qualified asterisk must have identifier as its child", ErrorCodes::LOGICAL_ERROR); + + size_t num_components = ident->children.size(); + if (num_components > 2) + throw Exception("Qualified asterisk cannot have more than two qualifiers", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + + for (auto it = tables_expression.begin(); it != tables_expression.end(); ++it) + { + const ASTTableExpression * table_expression = *it; + const auto database_and_table_with_alias = getTableNameWithAliasFromTableExpression(*table_expression, context); + /// database.table.* + if (num_components == 2 && !database_and_table_with_alias.database.empty() + && static_cast(*ident->children[0]).name == database_and_table_with_alias.database + && static_cast(*ident->children[1]).name == database_and_table_with_alias.table) + continue; + /// table.* or alias.* + else if (num_components == 0 + && ((!database_and_table_with_alias.table.empty() && ident->name == database_and_table_with_alias.table) + || (!database_and_table_with_alias.alias.empty() && ident->name == database_and_table_with_alias.alias))) + continue; + else + /// It's not a required table + tables_expression.erase(it); + } + } + + ASTs projection_columns; + for (auto & table_expression : tables_expression) + { + if (table_expression->subquery) + { + const auto subquery = static_cast(table_expression->subquery.get()); + const auto select_with_union_query = static_cast(subquery->children[0].get()); + const auto subquery_projections = getSelectQueryProjectionColumns(select_with_union_query->list_of_selects->children[0]); + projection_columns.insert(projection_columns.end(), subquery_projections.begin(), subquery_projections.end()); + } + else + { + StoragePtr storage; + + if (table_expression->table_function) + storage = const_cast(context).executeTableFunction(table_expression->table_function); + else if (table_expression->database_and_table_name) + { + const auto database_and_table_ast = static_cast(table_expression->database_and_table_name.get()); + const auto database_and_table_name = getDatabaseAndTableNameFromIdentifier(*database_and_table_ast); + storage = context.tryGetTable(database_and_table_name.first, database_and_table_name.second); + } + + const auto block = storage->getSampleBlock(); + for (size_t idx = 0; idx < block.columns(); idx++) + projection_columns.emplace_back(std::make_shared(block.getByPosition(idx).name)); + } + } + return projection_columns; +} + +std::vector PredicateExpressionsOptimizer::getSelectTablesExpression(ASTSelectQuery * select_query) +{ + if (!select_query->tables) + return {}; + + std::vector tables_expression; + const ASTTablesInSelectQuery & tables_in_select_query = static_cast(*select_query->tables); + + for (const auto & child : tables_in_select_query.children) + { + ASTTablesInSelectQueryElement * tables_element = static_cast(child.get()); + + if (tables_element->table_expression) + tables_expression.emplace_back(static_cast(tables_element->table_expression.get())); + } + + return tables_expression; +} + } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h index 723fe0b118c..f3d00a6fce9 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.h +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.h @@ -7,6 +7,9 @@ #include #include #include +#include +#include +#include namespace DB { @@ -21,6 +24,8 @@ using PredicateExpressions = std::vector; using ProjectionWithAlias = std::pair; using ProjectionsWithAliases = std::vector; using SubqueriesProjectionColumns = std::map; +using IdentifierWithQualifiedName = std::pair; +using IdentifiersWithQualifiedNameSet = std::vector; /** This class provides functions for Push-Down predicate expressions @@ -37,13 +42,14 @@ using SubqueriesProjectionColumns = std::map; class PredicateExpressionsOptimizer { public: - PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_); + PredicateExpressionsOptimizer(ASTSelectQuery * ast_select_, const Settings & settings_, const Context & context_); bool optimize(); private: ASTSelectQuery * ast_select; const Settings & settings; + const Context & context; enum OptimizeKind { @@ -57,24 +63,29 @@ private: PredicateExpressions splitConjunctionPredicate(ASTPtr & predicate_expression); - void getExpressionDependentColumns(const ASTPtr & expression, ASTs & expression_dependent_columns); + void getDependenciesAndQualifiedOfExpression(const ASTPtr & expression, IdentifiersWithQualifiedNameSet & dependencies_and_qualified, + std::vector & tables_with_aliases); bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery); bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, bool is_prewhere); - bool cannotPushDownOuterPredicate( - const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, - ASTs & expression_dependent_columns, bool & is_prewhere, OptimizeKind & optimize_kind); + bool cannotPushDownOuterPredicate(const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, + IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind); - void cloneOuterPredicateForInnerPredicate( - const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, ASTs & predicate_dependent_columns, - ASTPtr & inner_predicate); + void cloneOuterPredicateForInnerPredicate(const ASTPtr & outer_predicate, const ProjectionsWithAliases & projection_columns, + std::vector & tables, ASTPtr & inner_predicate); + void getAllSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns); - void getAllSubqueryProjectionColumns(IAST * node, SubqueriesProjectionColumns & all_subquery_projection_columns); + void getSubqueryProjectionColumns(SubqueriesProjectionColumns & all_subquery_projection_columns, + String & qualified_name_prefix, const ASTPtr & subquery); - void getSubqueryProjectionColumns(IAST * subquery, SubqueriesProjectionColumns & all_subquery_projection_columns, ASTs & output_projections); + ASTs getSelectQueryProjectionColumns(ASTPtr & ast); + + std::vector getSelectTablesExpression(ASTSelectQuery * select_query); + + ASTs evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk); }; } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index e1e02af96e4..524906bddfe 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -272,7 +272,7 @@ struct Settings M(SettingBool, log_query_settings, true, "Log query settings into the query_log.") \ M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table.") \ M(SettingString, send_logs_level, "none", "Send server text logs with specified minumum level to client. Valid values: 'trace', 'debug', 'info', 'warning', 'error', 'none'") \ - M(SettingBool, enable_optimize_predicate_expression, 0, "If it is set to true, optimize predicates to subqueries.") \ + M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.") \ \ M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.") \ M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.") \ diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/evaluateQualified.cpp new file mode 100644 index 00000000000..262cbf6d661 --- /dev/null +++ b/dbms/src/Interpreters/evaluateQualified.cpp @@ -0,0 +1,160 @@ +#include +#include +#include + +namespace DB +{ + +/// Checks that ast is ASTIdentifier and remove num_qualifiers_to_strip components from left. +/// Example: 'database.table.name' -> (num_qualifiers_to_strip = 2) -> 'name'. +void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip) +{ + ASTIdentifier * identifier = typeid_cast(ast.get()); + + if (!identifier) + throw DB::Exception("ASTIdentifier expected for stripIdentifier", DB::ErrorCodes::LOGICAL_ERROR); + + if (num_qualifiers_to_strip) + { + size_t num_components = identifier->children.size(); + + /// plain column + if (num_components - num_qualifiers_to_strip == 1) + { + DB::String node_alias = identifier->tryGetAlias(); + ast = identifier->children.back(); + if (!node_alias.empty()) + ast->setAlias(node_alias); + } + else + /// nested column + { + identifier->children.erase(identifier->children.begin(), identifier->children.begin() + num_qualifiers_to_strip); + DB::String new_name; + for (const auto & child : identifier->children) + { + if (!new_name.empty()) + new_name += '.'; + new_name += static_cast(*child.get()).name; + } + identifier->name = new_name; + } + } +} + + +DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, + const Context & context) +{ + DatabaseAndTableWithAlias database_and_table_with_alias; + + if (table_expression.database_and_table_name) + { + const auto & identifier = static_cast(*table_expression.database_and_table_name); + + database_and_table_with_alias.alias = identifier.tryGetAlias(); + + if (table_expression.database_and_table_name->children.empty()) + { + database_and_table_with_alias.database = context.getCurrentDatabase(); + database_and_table_with_alias.table = identifier.name; + } + else + { + if (table_expression.database_and_table_name->children.size() != 2) + throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); + + database_and_table_with_alias.database = static_cast(*identifier.children[0]).name; + database_and_table_with_alias.table = static_cast(*identifier.children[1]).name; + } + } + else if (table_expression.table_function) + { + database_and_table_with_alias.alias = table_expression.table_function->tryGetAlias(); + } + else if (table_expression.subquery) + { + database_and_table_with_alias.alias = table_expression.subquery->tryGetAlias(); + } + else + throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); + + return database_and_table_with_alias; +} + +/// Get the number of components of identifier which are correspond to 'alias.', 'table.' or 'databas.table.' from names. +size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & names) +{ + size_t num_qualifiers_to_strip = 0; + + auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; + + /// It is compound identifier + if (!identifier.children.empty()) + { + size_t num_components = identifier.children.size(); + + /// database.table.column + if (num_components >= 3 + && !names.database.empty() + && get_identifier_name(identifier.children[0]) == names.database + && get_identifier_name(identifier.children[1]) == names.table) + { + num_qualifiers_to_strip = 2; + } + + /// table.column or alias.column. If num_components > 2, it is like table.nested.column. + if (num_components >= 2 + && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) + || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) + { + num_qualifiers_to_strip = 1; + } + } + + return num_qualifiers_to_strip; +} + +std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier) +{ + std::pair res; + res.second = identifier.name; + if (!identifier.children.empty()) + { + if (identifier.children.size() != 2) + throw Exception("Qualified table name could have only two components", ErrorCodes::LOGICAL_ERROR); + + res.first = typeid_cast(*identifier.children[0]).name; + res.second = typeid_cast(*identifier.children[1]).name; + } + return res; +} + +String DatabaseAndTableWithAlias::getQualifiedNamePrefix() const +{ + return (!alias.empty() ? alias : (database + '.' + table)) + '.'; +} + +void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const +{ + if (auto identifier = typeid_cast(ast.get())) + { + String prefix = getQualifiedNamePrefix(); + identifier->name.insert(identifier->name.begin(), prefix.begin(), prefix.end()); + + Names qualifiers; + if (!alias.empty()) + qualifiers.push_back(alias); + else + { + qualifiers.push_back(database); + qualifiers.push_back(table); + } + + for (const auto & qualifier : qualifiers) + identifier->children.emplace_back(std::make_shared(qualifier)); + } +} + +} \ No newline at end of file diff --git a/dbms/src/Interpreters/evaluateQualified.h b/dbms/src/Interpreters/evaluateQualified.h new file mode 100644 index 00000000000..01f1dc8f9a6 --- /dev/null +++ b/dbms/src/Interpreters/evaluateQualified.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +struct DatabaseAndTableWithAlias +{ + String database; + String table; + String alias; + + /// "alias." or "database.table." if alias is empty + String getQualifiedNamePrefix() const; + + /// If ast is ASTIdentifier, prepend getQualifiedNamePrefix() to it's name. + void makeQualifiedName(const ASTPtr & ast) const; +}; + +void stripIdentifier(DB::ASTPtr & ast, size_t num_qualifiers_to_strip); + +DatabaseAndTableWithAlias getTableNameWithAliasFromTableExpression(const ASTTableExpression & table_expression, + const Context & context); + +size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifier & identifier, + const DatabaseAndTableWithAlias & names); + +std::pair getDatabaseAndTableNameFromIdentifier(const ASTIdentifier & identifier); + +} \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index c3cb55cdac6..7d03801e78f 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -1,15 +1,23 @@ +-------Query that previously worked but now doesn\'t work.------- +-------Not need optimize predicate, but it works.------- 1 1 1 -2000-01-01 1 test string 1 1 +-------Need push down------- 1 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -2000-01-01 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -1 test string 1 1 test string 1 -1 test string 1 1 test string 1 -test string 1 1 1 -test string 1 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +1 2000-01-01 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 +1 2000-01-01 2000-01-01 1 test string 1 1 +-------Push to having expression, need check.------- diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index b884d9feae7..156eebaf16b 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -1,63 +1,56 @@ SET send_logs_level = 'none'; DROP TABLE IF EXISTS test.test; -DROP TABLE IF EXISTS test.test_union_1; -DROP TABLE IF EXISTS test.test_union_2; -DROP TABLE IF EXISTS test.test_join_1; -DROP TABLE IF EXISTS test.test_join_2; - CREATE TABLE test.test(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); -CREATE TABLE test.test_union_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192); -CREATE TABLE test.test_union_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192); -CREATE TABLE test.test_join_1(date_1 Date, id_1 Int8, name_1 String, value_1 Int64) ENGINE = MergeTree(date_1, (id_1, date_1), 8192); -CREATE TABLE test.test_join_2(date_2 Date, id_2 Int8, name_2 String, value_2 Int64) ENGINE = MergeTree(date_2, (id_2, date_2), 8192); - INSERT INTO test.test VALUES('2000-01-01', 1, 'test string 1', 1); INSERT INTO test.test VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_union_1 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_union_1 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_union_2 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_union_2 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_join_1 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_join_1 VALUES('2000-01-01', 2, 'test string 2', 2); -INSERT INTO test.test_join_2 VALUES('2000-01-01', 1, 'test string 1', 1); -INSERT INTO test.test_join_2 VALUES('2000-01-01', 2, 'test string 2', 2); - SET enable_optimize_predicate_expression = 1; --- Query that previously worked but now doesn't work. +SELECT '-------Query that previously worked but now doesn\'t work.-------'; SELECT * FROM (SELECT 1) WHERE `1` = 1; -- { serverError 47 } -SELECT 1; -- Not need push down, but it works. +SELECT '-------Not need optimize predicate, but it works.-------'; +SELECT 1; SELECT 1 AS id WHERE id = 1; SELECT arrayJoin([1,2,3]) AS id WHERE id = 1; -SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; --- Need push down +SELECT '-------Need push down-------'; SELECT * FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; SELECT id FROM (SELECT arrayJoin([1, 2, 3]) AS id) WHERE id = 1; SELECT date, id, name, value FROM (SELECT date, name, value,min(id) AS id FROM test.test GROUP BY date, name, value) WHERE id = 1; - SET force_primary_key = 1; -SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; -SELECT date, id FROM (SELECT id, date, min(value) FROM test.test GROUP BY id, date) WHERE id = 1; -SELECT date_1, id_1, name_1, value_1 FROM (SELECT date_1, id_1, name_1, value_1 FROM test.test_union_1 UNION ALL SELECT date_2, id_2, name_2, value_2 FROM test.test_union_2) WHERE id_1 = 1; -SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_join_2) USING name WHERE id_1 = 1 AND id_2 = 1; -SELECT * FROM (SELECT id_1, name_1 AS name FROM test.test_join_1) ANY LEFT JOIN (SELECT id_2, name_2 AS name FROM test.test_union_2 UNION ALL SELECT id_1, name_1 AS name FROM test.test_union_1) USING name WHERE id_1 = 1 AND id_2 = 1; -SELECT * FROM (SELECT name_1,id_1 AS id_1, id_1 AS id_2 FROM test.test_union_1 UNION ALL (SELECT name,id_1,id_2 FROM (SELECT name_1 AS name, id_1 FROM test.test_join_1) ANY INNER JOIN (SELECT name_2 AS name, id_2 FROM test.test_join_2) USING (name))) WHERE id_1 = 1 AND id_2 = 1; +-- Optimize predicate expression with asterisk +SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; +-- Optimize predicate expression with asterisk and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test)) WHERE id = 1; +-- Optimize predicate expression with qualified asterisk +SELECT * FROM (SELECT b.* FROM (SELECT * FROM test.test) AS b) WHERE id = 1; +-- Optimize predicate expression without asterisk +SELECT * FROM (SELECT date, id, name, value FROM test.test) WHERE id = 1; +-- Optimize predicate expression without asterisk and contains nested subquery +SELECT * FROM (SELECT date, id, name, value FROM (SELECT date, id, name, value FROM test.test)) WHERE id = 1; +-- Optimize predicate expression with qualified +SELECT * FROM (SELECT * FROM test.test) AS b WHERE b.id = 1; +-- Optimize predicate expression with qualified and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) AS a) AS b WHERE b.id = 1; +-- Optimize predicate expression with aggregate function +SELECT * FROM (SELECT id, date, min(value) AS value FROM test.test GROUP BY id, date) WHERE id = 1; --- TODO This should work: -SELECT * FROM (SELECT * FROM test.test) WHERE id = 1; -- { serverError 277 } +-- Optimize predicate expression with union all query +SELECT * FROM (SELECT * FROM test.test UNION ALL SELECT * FROM test.test) WHERE id = 1; +-- Optimize predicate expression with join query +SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id WHERE id = 1; +-- Optimize predicate expression with join and nested subquery +SELECT * FROM (SELECT * FROM (SELECT * FROM test.test) ANY LEFT JOIN (SELECT * FROM test.test) USING id) WHERE id = 1; +-- Optimize predicate expression with join query and qualified +SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.id = 1; +SELECT '-------Push to having expression, need check.-------'; SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } DROP TABLE IF EXISTS test.test; -DROP TABLE IF EXISTS test.test_union_1; -DROP TABLE IF EXISTS test.test_union_2; -DROP TABLE IF EXISTS test.test_join_1; -DROP TABLE IF EXISTS test.test_join_2; From a55b9623c5d09c06a620b3c2941ea2f8afe975ad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 05:54:02 +0300 Subject: [PATCH 142/192] Fixed arrayDistinct function [#CLICKHOUSE-3928] --- dbms/src/Functions/FunctionsArray.cpp | 144 ++++++++++-------- dbms/src/Functions/FunctionsArray.h | 4 +- .../00691_array_distinct.reference | 22 +++ .../0_stateless/00691_array_distinct.sql | 14 ++ 4 files changed, 119 insertions(+), 65 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00691_array_distinct.reference create mode 100644 dbms/tests/queries/0_stateless/00691_array_distinct.sql diff --git a/dbms/src/Functions/FunctionsArray.cpp b/dbms/src/Functions/FunctionsArray.cpp index bba2ad8f3a9..b4436ff0608 100644 --- a/dbms/src/Functions/FunctionsArray.cpp +++ b/dbms/src/Functions/FunctionsArray.cpp @@ -945,14 +945,13 @@ void FunctionArrayEnumerate::executeImpl(Block & block, const ColumnNumbers & ar ColumnUInt32::Container & res_values = res_nested->getData(); res_values.resize(array->getData().size()); - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + ColumnArray::Offset prev_off = 0; + for (ColumnArray::Offset i = 0; i < offsets.size(); ++i) { - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) - { + ColumnArray::Offset off = offsets[i]; + for (ColumnArray::Offset j = prev_off; j < off; ++j) res_values[j] = j - prev_off + 1; - } + prev_off = off; } @@ -1101,13 +1100,13 @@ bool FunctionArrayUniq::executeNumber(const ColumnArray * array, const IColumn * null_map_data = &static_cast(null_map)->getData(); Set set; - size_t prev_off = 0; + ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) { set.clear(); bool found_null = false; - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + ColumnArray::Offset off = offsets[i]; + for (ColumnArray::Offset j = prev_off; j < off; ++j) { if (null_map_data && ((*null_map_data)[j] == 1)) found_null = true; @@ -1147,13 +1146,13 @@ bool FunctionArrayUniq::executeString(const ColumnArray * array, const IColumn * null_map_data = &static_cast(null_map)->getData(); Set set; - size_t prev_off = 0; + ColumnArray::Offset prev_off = 0; for (size_t i = 0; i < offsets.size(); ++i) { set.clear(); bool found_null = false; - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + ColumnArray::Offset off = offsets[i]; + for (ColumnArray::Offset j = prev_off; j < off; ++j) { if (null_map_data && ((*null_map_data)[j] == 1)) found_null = true; @@ -1209,26 +1208,26 @@ bool FunctionArrayUniq::execute128bit( /// Each binary blob is inserted into a hash table. /// Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + ColumnArray::Offset prev_off = 0; + for (ColumnArray::Offset i = 0; i < offsets.size(); ++i) { set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + ColumnArray::Offset off = offsets[i]; + for (ColumnArray::Offset j = prev_off; j < off; ++j) { if (has_nullable_columns) { KeysNullMap bitmap{}; - for (size_t i = 0; i < columns.size(); ++i) + for (ColumnArray::Offset i = 0; i < columns.size(); ++i) { if (null_maps[i]) { const auto & null_map = static_cast(*null_maps[i]).getData(); if (null_map[j] == 1) { - size_t bucket = i / 8; - size_t offset = i % 8; + ColumnArray::Offset bucket = i / 8; + ColumnArray::Offset offset = i % 8; bitmap[bucket] |= UInt8(1) << offset; } } @@ -1257,12 +1256,12 @@ void FunctionArrayUniq::executeHashed( HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + ColumnArray::Offset prev_off = 0; + for (ColumnArray::Offset i = 0; i < offsets.size(); ++i) { set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + ColumnArray::Offset off = offsets[i]; + for (ColumnArray::Offset j = prev_off; j < off; ++j) set.insert(hash128(j, count, columns)); res_values[i] = set.size(); @@ -1308,9 +1307,6 @@ void FunctionArrayDistinct::executeImpl(Block & block, const ColumnNumbers & arg const IColumn & src_data = array->getData(); const ColumnArray::Offsets & offsets = array->getOffsets(); - ColumnRawPtrs original_data_columns; - original_data_columns.push_back(&src_data); - IColumn & res_data = res.getData(); ColumnArray::Offsets & res_offsets = res.getOffsets(); @@ -1339,13 +1335,14 @@ void FunctionArrayDistinct::executeImpl(Block & block, const ColumnNumbers & arg || executeNumber(*inner_col, offsets, res_data, res_offsets, nullable_col) || executeNumber(*inner_col, offsets, res_data, res_offsets, nullable_col) || executeString(*inner_col, offsets, res_data, res_offsets, nullable_col))) - executeHashed(offsets, original_data_columns, res_data, res_offsets, nullable_col); + executeHashed(*inner_col, offsets, res_data, res_offsets, nullable_col); block.getByPosition(result).column = std::move(res_ptr); } template -bool FunctionArrayDistinct::executeNumber(const IColumn & src_data, +bool FunctionArrayDistinct::executeNumber( + const IColumn & src_data, const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, @@ -1364,9 +1361,7 @@ bool FunctionArrayDistinct::executeNumber(const IColumn & src_data, const PaddedPODArray * src_null_map = nullptr; if (nullable_col) - { src_null_map = &static_cast(&nullable_col->getNullMapColumn())->getData(); - } using Set = ClearableHashSet, @@ -1374,22 +1369,31 @@ bool FunctionArrayDistinct::executeNumber(const IColumn & src_data, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; Set set; - size_t prev_off = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + + ColumnArray::Offset prev_src_offset = 0; + ColumnArray::Offset res_offset = 0; + + for (ColumnArray::Offset i = 0; i < src_offsets.size(); ++i) { set.clear(); - size_t off = src_offsets[i]; - for (size_t j = prev_off; j < off; ++j) + + ColumnArray::Offset curr_src_offset = src_offsets[i]; + for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { - if ((set.find(values[j]) == set.end()) && (!nullable_col || (*src_null_map)[j] == 0)) + if (nullable_col && (*src_null_map)[j]) + continue; + + if (set.find(values[j]) == set.end()) { res_data.emplace_back(values[j]); set.insert(values[j]); } } - res_offsets.emplace_back(set.size() + prev_off); - prev_off = off; + res_offset += set.size(); + res_offsets.emplace_back(res_offset); + + prev_src_offset = curr_src_offset; } return true; } @@ -1404,9 +1408,7 @@ bool FunctionArrayDistinct::executeString( const ColumnString * src_data_concrete = checkAndGetColumn(&src_data); if (!src_data_concrete) - { return false; - } ColumnString & res_data_column_string = typeid_cast(res_data_col); @@ -1418,70 +1420,86 @@ bool FunctionArrayDistinct::executeString( const PaddedPODArray * src_null_map = nullptr; if (nullable_col) - { src_null_map = &static_cast(&nullable_col->getNullMapColumn())->getData(); - } Set set; - size_t prev_off = 0; - for (size_t i = 0; i < src_offsets.size(); ++i) + + ColumnArray::Offset prev_src_offset = 0; + ColumnArray::Offset res_offset = 0; + + for (ColumnArray::Offset i = 0; i < src_offsets.size(); ++i) { set.clear(); - size_t off = src_offsets[i]; - for (size_t j = prev_off; j < off; ++j) + + ColumnArray::Offset curr_src_offset = src_offsets[i]; + for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { + if (nullable_col && (*src_null_map)[j]) + continue; + StringRef str_ref = src_data_concrete->getDataAt(j); - if (set.find(str_ref) == set.end() && (!nullable_col || (*src_null_map)[j] == 0)) + if (set.find(str_ref) == set.end()) { set.insert(str_ref); res_data_column_string.insertData(str_ref.data, str_ref.size); } } - res_offsets.emplace_back(set.size() + prev_off); - prev_off = off; + res_offset += set.size(); + res_offsets.emplace_back(res_offset); + + prev_src_offset = curr_src_offset; } return true; } void FunctionArrayDistinct::executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, + const IColumn & src_data, + const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col) { - size_t count = columns.size(); - using Set = ClearableHashSet, HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; const PaddedPODArray * src_null_map = nullptr; if (nullable_col) - { src_null_map = &static_cast(&nullable_col->getNullMapColumn())->getData(); - } Set set; - size_t prev_off = 0; - for (size_t i = 0; i < offsets.size(); ++i) + + ColumnArray::Offset prev_src_offset = 0; + ColumnArray::Offset res_offset = 0; + + for (ColumnArray::Offset i = 0; i < src_offsets.size(); ++i) { set.clear(); - size_t off = offsets[i]; - for (size_t j = prev_off; j < off; ++j) + + ColumnArray::Offset curr_src_offset = src_offsets[i]; + for (ColumnArray::Offset j = prev_src_offset; j < curr_src_offset; ++j) { - auto hash = hash128(j, count, columns); - if (set.find(hash) == set.end() && (!nullable_col || (*src_null_map)[j] == 0)) + if (nullable_col && (*src_null_map)[j]) + continue; + + UInt128 hash; + SipHash hash_function; + src_data.updateHashWithValue(j, hash_function); + hash_function.get128(reinterpret_cast(&hash)); + + if (set.find(hash) == set.end()) { set.insert(hash); - res_data_col.insertFrom(*columns[0], j); + res_data_col.insertFrom(src_data, j); } } - res_offsets.emplace_back(set.size() + prev_off); - prev_off = off; + res_offset += set.size(); + res_offsets.emplace_back(res_offset); + + prev_src_offset = curr_src_offset; } } diff --git a/dbms/src/Functions/FunctionsArray.h b/dbms/src/Functions/FunctionsArray.h index bab944a6bd1..c90f6a7c6bf 100644 --- a/dbms/src/Functions/FunctionsArray.h +++ b/dbms/src/Functions/FunctionsArray.h @@ -1252,8 +1252,8 @@ private: const ColumnNullable * nullable_col); void executeHashed( - const ColumnArray::Offsets & offsets, - const ColumnRawPtrs & columns, + const IColumn & src_data, + const ColumnArray::Offsets & src_offsets, IColumn & res_data_col, ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col); diff --git a/dbms/tests/queries/0_stateless/00691_array_distinct.reference b/dbms/tests/queries/0_stateless/00691_array_distinct.reference new file mode 100644 index 00000000000..ce8b4c2ec7a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00691_array_distinct.reference @@ -0,0 +1,22 @@ +[0] +[0] +[[0]] +[[0]] +[''] +[''] +[0] +[0] +[0,1] +[0,1] +['','Hello'] +['','Hello'] +[[0]] +[[0]] +[''] +[''] +[0] +[0] +[0,1] +[0,1] +['','Hello'] +['','Hello'] diff --git a/dbms/tests/queries/0_stateless/00691_array_distinct.sql b/dbms/tests/queries/0_stateless/00691_array_distinct.sql new file mode 100644 index 00000000000..59a523fc503 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00691_array_distinct.sql @@ -0,0 +1,14 @@ +SELECT arrayDistinct(arrayMap(x -> 0, range(2))) FROM numbers(2); + +SELECT arrayDistinct(materialize([[0], [0]])) FROM numbers(2); +SELECT arrayDistinct(materialize(['', '', ''])) FROM numbers(2); +SELECT arrayDistinct(materialize([0, 0, 0])) FROM numbers(2); +SELECT arrayDistinct(materialize([0, 1, 1, 0])) FROM numbers(2); +SELECT arrayDistinct(materialize(['', 'Hello', ''])) FROM numbers(2); + + +SELECT arrayDistinct(materialize([[0], [0]])) FROM numbers(2); +SELECT arrayDistinct(materialize(['', NULL, ''])) FROM numbers(2); +SELECT arrayDistinct(materialize([0, NULL, 0])) FROM numbers(2); +SELECT arrayDistinct(materialize([0, 1, NULL, 0])) FROM numbers(2); +SELECT arrayDistinct(materialize(['', 'Hello', NULL])) FROM numbers(2); From bf517d5523e852b3e3c67712bb0897dc6f2788fe Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 24 Aug 2018 11:32:20 +0800 Subject: [PATCH 143/192] ISSUES-863 fix failure tests --- .../PredicateExpressionsOptimizer.cpp | 21 +++++++++++-------- dbms/src/Interpreters/evaluateQualified.cpp | 2 +- .../00597_push_down_predicate.reference | 1 + .../0_stateless/00597_push_down_predicate.sql | 3 +++ 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index d3e5fa05cee..0f6c8c18f3e 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -155,7 +155,7 @@ bool PredicateExpressionsOptimizer::cannotPushDownOuterPredicate( const ProjectionsWithAliases & subquery_projection_columns, ASTSelectQuery * subquery, IdentifiersWithQualifiedNameSet & outer_predicate_dependencies, bool & is_prewhere, OptimizeKind & optimize_kind) { - if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_offset || subquery->with_expression_list) + if (subquery->final() || subquery->limit_by_expression_list || subquery->limit_length || subquery->with_expression_list) return true; for (auto & predicate_dependency : outer_predicate_dependencies) @@ -259,14 +259,17 @@ void PredicateExpressionsOptimizer::getSubqueryProjectionColumns(SubqueriesProje ProjectionsWithAliases subquery_projections; auto select_projection_columns = getSelectQueryProjectionColumns(select_without_union_query); - if (select_with_union_projections.empty()) - select_with_union_projections = select_projection_columns; + if (!select_projection_columns.empty()) + { + if (select_with_union_projections.empty()) + select_with_union_projections = select_projection_columns; - for (size_t i = 0; i < select_projection_columns.size(); i++) - subquery_projections.emplace_back(std::pair(select_projection_columns[i], - qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName())); + for (size_t i = 0; i < select_projection_columns.size(); i++) + subquery_projections.emplace_back(std::pair(select_projection_columns[i], + qualified_name_prefix + select_with_union_projections[i]->getAliasOrColumnName())); - all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections)); + all_subquery_projection_columns.insert(std::pair(select_without_union_query.get(), subquery_projections)); + } } } @@ -292,10 +295,10 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast return projection_columns; } -ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery *select_query, const ASTPtr &asterisk) +ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk) { if (!select_query->tables || select_query->tables->children.empty()) - throw Exception("Logical error: The asterisk cannot be replaced, because there is no table.", ErrorCodes::LOGICAL_ERROR); + return {}; std::vector tables_expression = getSelectTablesExpression(select_query); diff --git a/dbms/src/Interpreters/evaluateQualified.cpp b/dbms/src/Interpreters/evaluateQualified.cpp index 262cbf6d661..0c7604560a9 100644 --- a/dbms/src/Interpreters/evaluateQualified.cpp +++ b/dbms/src/Interpreters/evaluateQualified.cpp @@ -157,4 +157,4 @@ void DatabaseAndTableWithAlias::makeQualifiedName(const ASTPtr & ast) const } } -} \ No newline at end of file +} diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index 7d03801e78f..ae742fd40c5 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -21,3 +21,4 @@ 2000-01-01 1 test string 1 1 1 2000-01-01 2000-01-01 1 test string 1 1 -------Push to having expression, need check.------- +-------Compatibility test------- diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql index 156eebaf16b..2a58e3d2839 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -53,4 +53,7 @@ SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers SELECT '-------Push to having expression, need check.-------'; SELECT id FROM (SELECT min(id) AS id FROM test.test) WHERE id = 1; -- { serverError 277 } +SELECT '-------Compatibility test-------'; +SELECT * FROM (SELECT 1 AS id, toDate('2000-01-01') AS date FROM system.numbers LIMIT 1) ANY LEFT JOIN (SELECT * FROM test.test) AS b USING date WHERE b.date = toDate('2000-01-01'); -- {serverError 47} + DROP TABLE IF EXISTS test.test; From f4813a47f827613781a3a9984e4b2fb39b1538f8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 08:20:18 +0300 Subject: [PATCH 144/192] Whitespaces #2913 --- dbms/src/Dictionaries/DictionaryBlockInputStream.h | 7 ++++--- .../src/Dictionaries/RangeDictionaryBlockInputStream.h | 10 +++++----- dbms/src/Dictionaries/RangeHashedDictionary.cpp | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index 12cfcca58d9..0fa5b5be63c 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -1,4 +1,5 @@ #pragma once + #include #include #include @@ -102,7 +103,7 @@ private: template ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, - const DictionaryAttribute& attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; ColumnPtr getColumnFromIds(const PaddedPODArray & ids) const; void fillKeyColumns(const std::vector & keys, size_t start, size_t size, @@ -287,7 +288,7 @@ Block DictionaryBlockInputStream::fillBlock( for (const auto idx : ext::range(0, structure.attributes.size())) { - const DictionaryAttribute& attribute = structure.attributes[idx]; + const DictionaryAttribute & attribute = structure.attributes[idx]; if (names.find(attribute.name) != names.end()) { ColumnPtr column; @@ -363,7 +364,7 @@ template ColumnPtr DictionaryBlockInputStream::getColumnFromStringAttribute( Getter getter, const PaddedPODArray & ids_to_fill, const Columns & keys, const DataTypes & data_types, - const DictionaryAttribute& attribute, const DictionaryType & dict) const + const DictionaryAttribute & attribute, const DictionaryType & dict) const { auto column_string = ColumnString::create(); auto ptr = column_string.get(); diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index 857c9b58cfb..d0596b41513 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -44,9 +44,9 @@ private: template ColumnPtr getColumnFromAttribute(DictionaryGetter getter, const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute& attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; ColumnPtr getColumnFromAttributeString(const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute& attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; template ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) const; @@ -104,7 +104,7 @@ template template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttribute( DictionaryGetter getter, const PaddedPODArray & ids, - const PaddedPODArray & dates, const DictionaryAttribute& attribute, const DictionaryType& dictionary) const + const PaddedPODArray & dates, const DictionaryAttribute & attribute, const DictionaryType& dictionary) const { auto column_vector = ColumnVector::create(ids.size()); (dictionary.*getter)(attribute.name, ids, dates, column_vector->getData()); @@ -114,7 +114,7 @@ ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAtt template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttributeString( const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute& attribute, const DictionaryType& dictionary) const + const DictionaryAttribute & attribute, const DictionaryType& dictionary) const { auto column_string = ColumnString::create(); dictionary.getString(attribute.name, ids, dates, column_string.get()); @@ -183,7 +183,7 @@ Block RangeDictionaryBlockInputStream::fillBlock( for (const auto idx : ext::range(0, structure.attributes.size())) { - const DictionaryAttribute& attribute = structure.attributes[idx]; + const DictionaryAttribute & attribute = structure.attributes[idx]; if (names.find(attribute.name) != names.end()) { ColumnPtr column; diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index fc938143176..fe90fe60b84 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -376,7 +376,7 @@ void RangeHashedDictionary::getIdsAndDates(PaddedPODArray & ids, } template -void RangeHashedDictionary::getIdsAndDates(const Attribute& attribute, PaddedPODArray & ids, +void RangeHashedDictionary::getIdsAndDates(const Attribute & attribute, PaddedPODArray & ids, PaddedPODArray & start_dates, PaddedPODArray & end_dates) const { const HashMap> & attr = *std::get>(attribute.maps); From 27d43fb7ff890f4bbc13f19453ba613ee9d67ff8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 08:21:53 +0300 Subject: [PATCH 145/192] Fixed typo #2913 --- dbms/src/Dictionaries/DictionaryBlockInputStream.h | 4 ++-- dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index 0fa5b5be63c..c87e126d3ea 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -30,7 +30,7 @@ template class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase { public: - using DictionatyPtr = std::shared_ptr; + using DictionaryPtr = std::shared_ptr; DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, PaddedPODArray && ids, const Names & column_names); @@ -109,7 +109,7 @@ private: void fillKeyColumns(const std::vector & keys, size_t start, size_t size, const DictionaryStructure & dictionary_structure, ColumnsWithTypeAndName & columns) const; - DictionatyPtr dictionary; + DictionaryPtr dictionary; Names column_names; PaddedPODArray ids; ColumnsWithTypeAndName key_columns; diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index d0596b41513..063a3ccda3c 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -22,10 +22,10 @@ template class RangeDictionaryBlockInputStream : public DictionaryBlockInputStreamBase { public: - using DictionatyPtr = std::shared_ptr; + using DictionaryPtr = std::shared_ptr; RangeDictionaryBlockInputStream( - DictionatyPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray && ids, + DictionaryPtr dictionary, size_t max_block_size, const Names & column_names, PaddedPODArray && ids, PaddedPODArray && start_dates, PaddedPODArray && end_dates); String getName() const override @@ -62,7 +62,7 @@ private: PaddedPODArray makeDateKey( const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; - DictionatyPtr dictionary; + DictionaryPtr dictionary; Names column_names; PaddedPODArray ids; PaddedPODArray start_dates; @@ -72,7 +72,7 @@ private: template RangeDictionaryBlockInputStream::RangeDictionaryBlockInputStream( - DictionatyPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray && ids, + DictionaryPtr dictionary, size_t max_column_size, const Names & column_names, PaddedPODArray && ids, PaddedPODArray && start_dates, PaddedPODArray && end_dates) : DictionaryBlockInputStreamBase(ids.size(), max_column_size), dictionary(dictionary), column_names(column_names), From 6043ea02de15cdfb8f8e8d5047e8f6e9b9e1b105 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 08:25:00 +0300 Subject: [PATCH 146/192] Style #2913 --- .../AggregateFunctionHistogram.h | 6 +++--- dbms/src/Columns/ColumnUnique.h | 2 +- dbms/src/Common/ZooKeeper/ZooKeeperHolder.h | 4 ++-- dbms/src/Common/tests/lru_cache.cpp | 2 +- dbms/src/Dictionaries/DictionaryBlockInputStream.h | 8 ++++---- .../Embedded/GeodataProviders/INamesProvider.h | 2 +- .../Embedded/GeodataProviders/NamesProvider.h | 2 +- dbms/src/Dictionaries/FlatDictionary.h | 2 +- dbms/src/Dictionaries/MongoDBDictionarySource.cpp | 2 +- .../Dictionaries/RangeDictionaryBlockInputStream.h | 14 +++++++------- dbms/src/Interpreters/ExpressionJIT.cpp | 4 ++-- dbms/src/Interpreters/tests/compiler_test.cpp | 2 +- 12 files changed, 25 insertions(+), 25 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h index be149c4898d..b532a373270 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -48,7 +48,7 @@ private: Mean mean; Weight weight; - WeightedValue operator+ (const WeightedValue& other) + WeightedValue operator+ (const WeightedValue & other) { return {mean + other.weight * (other.mean - mean) / (other.weight + weight), other.weight + weight}; } @@ -263,7 +263,7 @@ public: compress(max_bins); } - void merge(const AggregateFunctionHistogramData& other, UInt32 max_bins) + void merge(const AggregateFunctionHistogramData & other, UInt32 max_bins) { lower_bound = std::min(lower_bound, other.lower_bound); upper_bound = std::max(lower_bound, other.upper_bound); @@ -354,7 +354,7 @@ public: void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { - auto& data = this->data(const_cast(place)); + auto & data = this->data(const_cast(place)); auto & to_array = static_cast(to); ColumnArray::Offsets & offsets_to = to_array.getOffsets(); diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index 931a8478516..dbed6ef499f 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -68,7 +68,7 @@ public: int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override { - auto & column_unique = static_cast(rhs); + auto & column_unique = static_cast(rhs); return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); } diff --git a/dbms/src/Common/ZooKeeper/ZooKeeperHolder.h b/dbms/src/Common/ZooKeeper/ZooKeeperHolder.h index d9e86d86406..d5792b8fde6 100644 --- a/dbms/src/Common/ZooKeeper/ZooKeeperHolder.h +++ b/dbms/src/Common/ZooKeeper/ZooKeeperHolder.h @@ -20,7 +20,7 @@ public: /// вызывать из одного потока - не thread safe template - void init(Args&&... args); + void init(Args &&... args); /// был ли класс инициализирован bool isInitialized() const { return ptr != nullptr; } @@ -76,7 +76,7 @@ private: }; template -void ZooKeeperHolder::init(Args&&... args) +void ZooKeeperHolder::init(Args &&... args) { ptr = std::make_shared(std::forward(args)...); } diff --git a/dbms/src/Common/tests/lru_cache.cpp b/dbms/src/Common/tests/lru_cache.cpp index 70c4eed2132..e50d6ad9786 100644 --- a/dbms/src/Common/tests/lru_cache.cpp +++ b/dbms/src/Common/tests/lru_cache.cpp @@ -190,7 +190,7 @@ bool test_concurrent() bool res = true; - auto load_func = [](const std::string& result, std::chrono::seconds sleep_for, bool throw_exc) + auto load_func = [](const std::string & result, std::chrono::seconds sleep_for, bool throw_exc) { std::this_thread::sleep_for(sleep_for); if (throw_exc) diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index c87e126d3ea..c4c470e373c 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -103,7 +103,7 @@ private: template ColumnPtr getColumnFromStringAttribute(Getter getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, - const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; ColumnPtr getColumnFromIds(const PaddedPODArray & ids) const; void fillKeyColumns(const std::vector & keys, size_t start, size_t size, @@ -115,7 +115,7 @@ private: ColumnsWithTypeAndName key_columns; Poco::Logger * logger; Block (DictionaryBlockInputStream::*fillBlockFunction)( - const PaddedPODArray & ids, const Columns& keys, + const PaddedPODArray & ids, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const; Columns data_columns; @@ -135,7 +135,7 @@ private: template DictionaryBlockInputStream::DictionaryBlockInputStream( std::shared_ptr dictionary, size_t max_block_size, - PaddedPODArray && ids, const Names& column_names) + PaddedPODArray && ids, const Names & column_names) : DictionaryBlockInputStreamBase(ids.size(), max_block_size), dictionary(std::static_pointer_cast(dictionary)), column_names(column_names), ids(std::move(ids)), @@ -148,7 +148,7 @@ DictionaryBlockInputStream::DictionaryBlockInputStream( template DictionaryBlockInputStream::DictionaryBlockInputStream( std::shared_ptr dictionary, size_t max_block_size, - const std::vector & keys, const Names& column_names) + const std::vector & keys, const Names & column_names) : DictionaryBlockInputStreamBase(keys.size(), max_block_size), dictionary(std::static_pointer_cast(dictionary)), column_names(column_names), logger(&Poco::Logger::get("DictionaryBlockInputStream")), diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h index 0789f76c862..fb18684b3fa 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h @@ -44,7 +44,7 @@ class IRegionsNamesDataProvider { public: virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource( - const std::string& language) const = 0; + const std::string & language) const = 0; virtual ~IRegionsNamesDataProvider() {} }; diff --git a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h index bc92a6a161c..916dfe38230 100644 --- a/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h +++ b/dbms/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h @@ -44,7 +44,7 @@ public: RegionsNamesDataProvider(const std::string & directory_); ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource( - const std::string& language) const override; + const std::string & language) const override; private: std::string getDataFilePath(const std::string & language) const; diff --git a/dbms/src/Dictionaries/FlatDictionary.h b/dbms/src/Dictionaries/FlatDictionary.h index 799789fbf67..e6e5aca8013 100644 --- a/dbms/src/Dictionaries/FlatDictionary.h +++ b/dbms/src/Dictionaries/FlatDictionary.h @@ -187,7 +187,7 @@ private: void resize(Attribute & attribute, const Key id); template - void setAttributeValueImpl(Attribute & attribute, const Key id, const T& value); + void setAttributeValueImpl(Attribute & attribute, const Key id, const T & value); void setAttributeValue(Attribute & attribute, const Key id, const Field & value); diff --git a/dbms/src/Dictionaries/MongoDBDictionarySource.cpp b/dbms/src/Dictionaries/MongoDBDictionarySource.cpp index 81a1e094d84..2fe8c91ac6b 100644 --- a/dbms/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/dbms/src/Dictionaries/MongoDBDictionarySource.cpp @@ -12,7 +12,7 @@ // only after poco // naming conflict: -// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string& value); +// Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); // dbms/src/IO/WriteHelpers.h:146 #define writeCString(s, buf) #include #include diff --git a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h index 063a3ccda3c..6f9e923848e 100644 --- a/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/RangeDictionaryBlockInputStream.h @@ -44,9 +44,9 @@ private: template ColumnPtr getColumnFromAttribute(DictionaryGetter getter, const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; ColumnPtr getColumnFromAttributeString(const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute & attribute, const DictionaryType& dictionary) const; + const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; template ColumnPtr getColumnFromPODArray(const PaddedPODArray & array) const; @@ -54,7 +54,7 @@ private: void addSpecialColumn( const std::optional & attribute, DataTypePtr type, const std::string & default_name, const std::unordered_set & column_names, - const PaddedPODArray & values, ColumnsWithTypeAndName& columns) const; + const PaddedPODArray & values, ColumnsWithTypeAndName & columns) const; Block fillBlock(const PaddedPODArray & ids, const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const; @@ -104,7 +104,7 @@ template template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttribute( DictionaryGetter getter, const PaddedPODArray & ids, - const PaddedPODArray & dates, const DictionaryAttribute & attribute, const DictionaryType& dictionary) const + const PaddedPODArray & dates, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const { auto column_vector = ColumnVector::create(ids.size()); (dictionary.*getter)(attribute.name, ids, dates, column_vector->getData()); @@ -114,7 +114,7 @@ ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAtt template ColumnPtr RangeDictionaryBlockInputStream::getColumnFromAttributeString( const PaddedPODArray & ids, const PaddedPODArray & dates, - const DictionaryAttribute & attribute, const DictionaryType& dictionary) const + const DictionaryAttribute & attribute, const DictionaryType & dictionary) const { auto column_string = ColumnString::create(); dictionary.getString(attribute.name, ids, dates, column_string.get()); @@ -137,7 +137,7 @@ template template void RangeDictionaryBlockInputStream::addSpecialColumn( const std::optional & attribute, DataTypePtr type, - const std::string& default_name, const std::unordered_set & column_names, + const std::string & default_name, const std::unordered_set & column_names, const PaddedPODArray & values, ColumnsWithTypeAndName & columns) const { std::string name = default_name; @@ -171,7 +171,7 @@ Block RangeDictionaryBlockInputStream::fillBlock( const PaddedPODArray & start_dates, const PaddedPODArray & end_dates) const { ColumnsWithTypeAndName columns; - const DictionaryStructure& structure = dictionary->getStructure(); + const DictionaryStructure & structure = dictionary->getStructure(); std::unordered_set names(column_names.begin(), column_names.end()); diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index eed7209cb33..a6dab436f47 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -114,7 +114,7 @@ static llvm::TargetMachine * getNativeMachine() llvm::SubtargetFeatures features; llvm::StringMap feature_map; if (llvm::sys::getHostCPUFeatures(feature_map)) - for (auto& f : feature_map) + for (auto & f : feature_map) features.AddFeature(f.first(), f.second); llvm::TargetOptions options; return target->createTargetMachine( @@ -545,7 +545,7 @@ public: } }; -static bool isCompilable(llvm::IRBuilderBase & builder, const IFunctionBase& function) +static bool isCompilable(llvm::IRBuilderBase & builder, const IFunctionBase & function) { if (!toNativeType(builder, function.getReturnType())) return false; diff --git a/dbms/src/Interpreters/tests/compiler_test.cpp b/dbms/src/Interpreters/tests/compiler_test.cpp index f0fe667a04f..c56cf5775d6 100644 --- a/dbms/src/Interpreters/tests/compiler_test.cpp +++ b/dbms/src/Interpreters/tests/compiler_test.cpp @@ -28,7 +28,7 @@ int main(int, char **) "catch (const std::runtime_error & e) { std::cout << \"Caught in .so: \" << e.what() << std::endl; throw; }\n" "}" ; - }, [](SharedLibraryPtr&){}); + }, [](SharedLibraryPtr &){}); auto f = lib->template get("_Z1fv"); From 21528697f8e73ad6b68b378ede2f92d04ae009e3 Mon Sep 17 00:00:00 2001 From: zhang2014 Date: Fri, 24 Aug 2018 13:30:49 +0800 Subject: [PATCH 147/192] ISSUES-863 turn off enable_optimize_predicate_expression --- dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp | 1 + dbms/src/Interpreters/Settings.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 0f6c8c18f3e..7428585086b 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -297,6 +297,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast ASTs PredicateExpressionsOptimizer::evaluateAsterisk(ASTSelectQuery * select_query, const ASTPtr & asterisk) { + /// SELECT *, SELECT dummy, SELECT 1 AS id if (!select_query->tables || select_query->tables->children.empty()) return {}; diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index 524906bddfe..e1e02af96e4 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -272,7 +272,7 @@ struct Settings M(SettingBool, log_query_settings, true, "Log query settings into the query_log.") \ M(SettingBool, log_query_threads, true, "Log query threads into system.query_thread_log table.") \ M(SettingString, send_logs_level, "none", "Send server text logs with specified minumum level to client. Valid values: 'trace', 'debug', 'info', 'warning', 'error', 'none'") \ - M(SettingBool, enable_optimize_predicate_expression, 1, "If it is set to true, optimize predicates to subqueries.") \ + M(SettingBool, enable_optimize_predicate_expression, 0, "If it is set to true, optimize predicates to subqueries.") \ \ M(SettingUInt64, low_cardinality_max_dictionary_size, 8192, "Maximum size (in rows) of shared global dictionary for LowCardinality type.") \ M(SettingBool, low_cardinality_use_single_dictionary_for_part, false, "LowCardinality type serialization setting. If is true, than will use additional keys when global dictionary overflows. Otherwise, will create several shared dictionaries.") \ From 761422da22128ac1bed028d82154e1e6ed35dfcc Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 24 Aug 2018 08:37:06 +0300 Subject: [PATCH 148/192] Miscellaneous #2913 --- .../Dictionaries/DictionaryBlockInputStream.h | 53 ++++++++++++------- .../DictionaryBlockInputStreamBase.cpp | 2 +- .../DictionaryBlockInputStreamBase.h | 3 +- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/dbms/src/Dictionaries/DictionaryBlockInputStream.h b/dbms/src/Dictionaries/DictionaryBlockInputStream.h index c4c470e373c..7d9f8e626e8 100644 --- a/dbms/src/Dictionaries/DictionaryBlockInputStream.h +++ b/dbms/src/Dictionaries/DictionaryBlockInputStream.h @@ -22,9 +22,8 @@ namespace ErrorCodes } -/* - * BlockInputStream implementation for external dictionaries - * read() returns single block consisting of the in-memory contents of the dictionaries +/* BlockInputStream implementation for external dictionaries + * read() returns blocks consisting of the in-memory contents of the dictionaries */ template class DictionaryBlockInputStream : public DictionaryBlockInputStreamBase @@ -34,11 +33,13 @@ public: DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, PaddedPODArray && ids, const Names & column_names); + DictionaryBlockInputStream(std::shared_ptr dictionary, size_t max_block_size, const std::vector & keys, const Names & column_names); using GetColumnsFunction = std::function & attributes)>; + // Used to separate key columns format for storage and view. // Calls get_key_columns_function to get key column for dictionary get fuction call // and get_view_columns_function to get key representation. @@ -60,16 +61,15 @@ private: // pointer types to getXXX functions // for single key dictionaries template - using DictionaryGetter = void (DictionaryType::*)( - const std::string &, const PaddedPODArray &, PaddedPODArray &) const; - using DictionaryStringGetter = void (DictionaryType::*)( - const std::string &, const PaddedPODArray &, ColumnString *) const; + using DictionaryGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, PaddedPODArray &) const; + + using DictionaryStringGetter = void (DictionaryType::*)(const std::string &, const PaddedPODArray &, ColumnString *) const; + // for complex complex key dictionaries template - using GetterByKey = void (DictionaryType::*)( - const std::string &, const Columns &, const DataTypes &, PaddedPODArray & out) const; - using StringGetterByKey = void (DictionaryType::*)( - const std::string &, const Columns &, const DataTypes &, ColumnString * out) const; + using GetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, PaddedPODArray & out) const; + + using StringGetterByKey = void (DictionaryType::*)(const std::string &, const Columns &, const DataTypes &, ColumnString * out) const; // call getXXX // for single key dictionaries @@ -77,15 +77,18 @@ private: void callGetter(DictionaryGetter getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + template void callGetter(DictionaryStringGetter getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + // for complex complex key dictionaries template void callGetter(GetterByKey getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, Container & container, const DictionaryAttribute & attribute, const DictionaryType & dictionary) const; + template void callGetter(StringGetterByKey getter, const PaddedPODArray & ids, const Columns & keys, const DataTypes & data_types, @@ -114,10 +117,13 @@ private: PaddedPODArray ids; ColumnsWithTypeAndName key_columns; Poco::Logger * logger; - Block (DictionaryBlockInputStream::*fillBlockFunction)( + + using FillBlockFunction = Block (DictionaryBlockInputStream::*)( const PaddedPODArray & ids, const Columns & keys, const DataTypes & types, ColumnsWithTypeAndName && view) const; + FillBlockFunction fill_block_function; + Columns data_columns; GetColumnsFunction get_key_columns_function; GetColumnsFunction get_view_columns_function; @@ -132,6 +138,7 @@ private: DictionaryKeyType key_type; }; + template DictionaryBlockInputStream::DictionaryBlockInputStream( std::shared_ptr dictionary, size_t max_block_size, @@ -140,7 +147,7 @@ DictionaryBlockInputStream::DictionaryBlockInputStream( dictionary(std::static_pointer_cast(dictionary)), column_names(column_names), ids(std::move(ids)), logger(&Poco::Logger::get("DictionaryBlockInputStream")), - fillBlockFunction(&DictionaryBlockInputStream::fillBlock), + fill_block_function(&DictionaryBlockInputStream::fillBlock), key_type(DictionaryKeyType::Id) { } @@ -152,7 +159,7 @@ DictionaryBlockInputStream::DictionaryBlockInputStream( : DictionaryBlockInputStreamBase(keys.size(), max_block_size), dictionary(std::static_pointer_cast(dictionary)), column_names(column_names), logger(&Poco::Logger::get("DictionaryBlockInputStream")), - fillBlockFunction(&DictionaryBlockInputStream::fillBlock), + fill_block_function(&DictionaryBlockInputStream::fillBlock), key_type(DictionaryKeyType::ComplexKey) { const DictionaryStructure & dictionaty_structure = dictionary->getStructure(); @@ -168,13 +175,14 @@ DictionaryBlockInputStream::DictionaryBlockInputStream( : DictionaryBlockInputStreamBase(data_columns.front()->size(), max_block_size), dictionary(std::static_pointer_cast(dictionary)), column_names(column_names), logger(&Poco::Logger::get("DictionaryBlockInputStream")), - fillBlockFunction(&DictionaryBlockInputStream::fillBlock), + fill_block_function(&DictionaryBlockInputStream::fillBlock), data_columns(data_columns), get_key_columns_function(get_key_columns_function), get_view_columns_function(get_view_columns_function), key_type(DictionaryKeyType::Callback) { } + template Block DictionaryBlockInputStream::getBlock(size_t start, size_t length) const { @@ -191,13 +199,15 @@ Block DictionaryBlockInputStream::getBlock(size_t start, si columns.emplace_back(column); view_columns.emplace_back(column, key_column.type, key_column.name); } - return (this->*fillBlockFunction)({}, columns, {}, std::move(view_columns)); + return (this->*fill_block_function)({}, columns, {}, std::move(view_columns)); } + case DictionaryKeyType::Id: { PaddedPODArray block_ids(ids.begin() + start, ids.begin() + start + length); - return (this->*fillBlockFunction)(block_ids, {}, {}, {}); + return (this->*fill_block_function)(block_ids, {}, {}, {}); } + case DictionaryKeyType::Callback: { Columns columns; @@ -215,12 +225,14 @@ Block DictionaryBlockInputStream::getBlock(size_t start, si columns.push_back(key_column.column); types.push_back(key_column.type); } - return (this->*fillBlockFunction)({}, columns, types, std::move(view_with_type_and_name)); + return (this->*fill_block_function)({}, columns, types, std::move(view_with_type_and_name)); } } + throw Exception("Unexpected DictionaryKeyType.", ErrorCodes::LOGICAL_ERROR); } + template template void DictionaryBlockInputStream::callGetter( @@ -261,6 +273,7 @@ void DictionaryBlockInputStream::callGetter( (dict.*getter)(attribute.name, keys, data_types, container); } + template template