From b4440131d00a6242c0404872adaafd499b268454 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 15 Feb 2022 16:25:07 +0800 Subject: [PATCH 01/87] add metrics for clickhouse-local --- src/Client/LocalConnection.cpp | 100 +++++++++++++++++++++++++- src/Client/LocalConnection.h | 11 ++- src/Interpreters/ProfileEventsExt.cpp | 45 ++++++++++++ src/Interpreters/ProfileEventsExt.h | 18 +++++ src/Server/TCPHandler.cpp | 73 +------------------ src/Server/TCPHandler.h | 11 +-- 6 files changed, 180 insertions(+), 78 deletions(-) diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 8ee4b9e1c1f..037111d4b1a 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -6,6 +6,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -18,10 +21,11 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_) +LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) + , send_profile_events(send_profile_events_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -58,6 +62,88 @@ void LocalConnection::updateProgress(const Progress & value) state->progress.incrementPiecewiseAtomically(value); } +void LocalConnection::updateProfileEvents(Block & block) +{ + static const NamesAndTypesList column_names_and_types = { + {"host_name", std::make_shared()}, + {"current_time", std::make_shared()}, + {"thread_id", std::make_shared()}, + {"type", ProfileEvents::TypeEnum}, + {"name", std::make_shared()}, + {"value", std::make_shared()}, + }; + + ColumnsWithTypeAndName temp_columns; + for (auto const & name_and_type : column_names_and_types) + temp_columns.emplace_back(name_and_type.type, name_and_type.name); + + using namespace ProfileEvents; + block = Block(std::move(temp_columns)); + MutableColumns columns = block.mutateColumns(); + auto thread_group = CurrentThread::getGroup(); + auto const current_thread_id = CurrentThread::get().thread_id; + std::vector snapshots; + ThreadIdToCountersSnapshot new_snapshots; + ProfileEventsSnapshot group_snapshot; + { + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) + { + auto const thread_id = stat.thread_id; + if (thread_id == current_thread_id) + continue; + auto current_time = time(nullptr); + auto previous_snapshot = last_sent_snapshots.find(thread_id); + auto increment = + previous_snapshot != last_sent_snapshots.end() + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); + snapshots.push_back(ProfileEventsSnapshot{ + thread_id, + std::move(increment), + stat.memory_usage, + current_time + }); + new_snapshots[thread_id] = std::move(stat.counters); + } + + group_snapshot.thread_id = 0; + group_snapshot.current_time = time(nullptr); + group_snapshot.memory_usage = thread_group->memory_tracker.get(); + auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); + auto prev_group_snapshot = last_sent_snapshots.find(0); + group_snapshot.counters = + prev_group_snapshot != last_sent_snapshots.end() + ? CountersIncrement(group_counters, prev_group_snapshot->second) + : CountersIncrement(group_counters); + new_snapshots[0] = std::move(group_counters); + } + last_sent_snapshots = std::move(new_snapshots); + + const String server_display_name = "localhost"; + for (auto & snapshot : snapshots) + { + dumpProfileEvents(snapshot, columns, server_display_name); + dumpMemoryTracker(snapshot, columns, server_display_name); + } + dumpProfileEvents(group_snapshot, columns, server_display_name); + dumpMemoryTracker(group_snapshot, columns, server_display_name); + + MutableColumns logs_columns; + Block curr_block; + size_t rows = 0; + + for (; state->profile_queue->tryPop(curr_block); ++rows) + { + auto curr_columns = curr_block.getColumns(); + for (size_t j = 0; j < curr_columns.size(); ++j) + columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); + } + +} + void LocalConnection::sendQuery( const ConnectionTimeouts &, const String & query, @@ -85,10 +171,15 @@ void LocalConnection::sendQuery( state->query_id = query_id; state->query = query; state->stage = QueryProcessingStage::Enum(stage); + state->profile_queue = std::make_shared(std::numeric_limits::max()); + CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); if (send_progress) state->after_send_progress.restart(); + if (send_profile_events) + state->after_send_profile_events.restart(); + next_packet_type.reset(); try @@ -231,6 +322,13 @@ bool LocalConnection::poll(size_t) return true; } + if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) + { + state->after_send_profile_events.restart(); + next_packet_type = Protocol::Server::ProfileEvents; + return true; + } + try { pollImpl(); diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index b85022cf183..9f0b6989c47 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -29,6 +30,7 @@ struct LocalQueryState std::unique_ptr executor; std::unique_ptr pushing_executor; std::unique_ptr pushing_async_executor; + InternalProfileEventsQueuePtr profile_queue; std::optional exception; @@ -50,13 +52,15 @@ struct LocalQueryState Progress progress; /// Time after the last check to stop the request and send the progress. Stopwatch after_send_progress; + Stopwatch after_send_profile_events; + }; class LocalConnection : public IServerConnection, WithContext { public: - explicit LocalConnection(ContextPtr context_, bool send_progress_ = false); + explicit LocalConnection(ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false); ~LocalConnection() override; @@ -129,12 +133,15 @@ private: void updateProgress(const Progress & value); + void updateProfileEvents(Block & block); + bool pollImpl(); ContextMutablePtr query_context; Session session; bool send_progress; + bool send_profile_events; String description = "clickhouse-local"; std::optional state; @@ -144,5 +151,7 @@ private: std::optional next_packet_type; String current_database; + + ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; }; } diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 472efc109fb..263ee9acec6 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -1,5 +1,6 @@ #include "ProfileEventsExt.h" #include +#include #include #include #include @@ -45,4 +46,48 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, offsets.push_back(offsets.back() + size); } + +void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + size_t rows = 0; + auto & name_column = columns[NAME_COLUMN_INDEX]; + auto & value_column = columns[VALUE_COLUMN_INDEX]; + for (ProfileEvents::Event event = 0; event < ProfileEvents::Counters::num_counters; ++event) + { + Int64 value = snapshot.counters[event]; + + if (value == 0) + continue; + + const char * desc = ProfileEvents::getName(event); + name_column->insertData(desc, strlen(desc)); + value_column->insert(value); + rows++; + } + + // Fill the rest of the columns with data + for (size_t row = 0; row < rows; ++row) + { + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(ProfileEvents::Type::INCREMENT); + } +} + +void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + { + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(ProfileEvents::Type::GAUGE); + + columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); + columns[i++]->insert(snapshot.memory_usage); + } +} + } diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 8a92eadec79..0edaec64d25 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -7,9 +7,27 @@ namespace ProfileEvents { +constexpr size_t NAME_COLUMN_INDEX = 4; +constexpr size_t VALUE_COLUMN_INDEX = 5; + +struct ProfileEventsSnapshot +{ + UInt64 thread_id; + ProfileEvents::CountersIncrement counters; + Int64 memory_usage; + time_t current_time; +}; + +using ThreadIdToCountersSnapshot = std::unordered_map; + /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +/// Add records about provided non-zero ProfileEvents::Counters. +void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name); + +void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name); + /// This is for ProfileEvents packets. enum Type : int8_t { diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 99523ff09e3..1198cc9271f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -853,82 +853,12 @@ void TCPHandler::sendExtremes(const Block & extremes) } } - -namespace -{ - using namespace ProfileEvents; - - constexpr size_t NAME_COLUMN_INDEX = 4; - constexpr size_t VALUE_COLUMN_INDEX = 5; - - struct ProfileEventsSnapshot - { - UInt64 thread_id; - ProfileEvents::CountersIncrement counters; - Int64 memory_usage; - time_t current_time; - }; - - /* - * Add records about provided non-zero ProfileEvents::Counters. - */ - void dumpProfileEvents( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - size_t rows = 0; - auto & name_column = columns[NAME_COLUMN_INDEX]; - auto & value_column = columns[VALUE_COLUMN_INDEX]; - for (ProfileEvents::Event event = 0; event < ProfileEvents::Counters::num_counters; ++event) - { - Int64 value = snapshot.counters[event]; - - if (value == 0) - continue; - - const char * desc = ProfileEvents::getName(event); - name_column->insertData(desc, strlen(desc)); - value_column->insert(value); - rows++; - } - - // Fill the rest of the columns with data - for (size_t row = 0; row < rows; ++row) - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::INCREMENT); - } - } - - void dumpMemoryTracker( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::GAUGE); - - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); - } - } -} - - void TCPHandler::sendProfileEvents() { if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) return; - NamesAndTypesList column_names_and_types = { + static const NamesAndTypesList column_names_and_types = { { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, @@ -943,6 +873,7 @@ void TCPHandler::sendProfileEvents() Block block(std::move(temp_columns)); + using namespace ProfileEvents; MutableColumns columns = block.mutateColumns(); auto thread_group = CurrentThread::getGroup(); auto const current_thread_id = CurrentThread::get().thread_id; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 6afda654e6a..b6ce9fa7507 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -3,9 +3,10 @@ #include #include -#include "Common/ProfileEvents.h" +#include #include #include +#include #include #include #include @@ -13,7 +14,7 @@ #include #include #include -#include +#include #include @@ -36,6 +37,8 @@ struct Settings; class ColumnsDescription; struct ProfileInfo; class TCPServer; +class NativeWriter; +class NativeReader; /// State of query processing. struct QueryState @@ -189,9 +192,7 @@ private: CurrentMetrics::Increment metric_increment{CurrentMetrics::TCPConnection}; - using ThreadIdToCountersSnapshot = std::unordered_map; - - ThreadIdToCountersSnapshot last_sent_snapshots; + ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; /// It is the name of the server that will be sent to the client. String server_display_name; From 24bd47e556f7258a2dbd92679f9b21ab8fb8282b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 15 Feb 2022 20:11:13 +0800 Subject: [PATCH 02/87] finish dev --- programs/local/LocalServer.cpp | 2 +- src/Client/ClientBase.h | 1 + src/Client/LocalConnection.cpp | 13 ++++++++----- src/Client/LocalConnection.h | 9 +++++++-- src/Client/Suggest.cpp | 1 + 5 files changed, 18 insertions(+), 8 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index db0015882b0..a090a007219 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -411,7 +411,7 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); - connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress); + connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress, need_render_profile_events); } diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index e74a6a47d76..08316168f98 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -212,6 +212,7 @@ protected: ProgressIndication progress_indication; bool need_render_progress = true; + bool need_render_profile_events = true; bool written_first_block = false; size_t processed_rows = 0; /// How many rows have been read or written. diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 037111d4b1a..e9cf335bd76 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -62,7 +62,7 @@ void LocalConnection::updateProgress(const Progress & value) state->progress.incrementPiecewiseAtomically(value); } -void LocalConnection::updateProfileEvents(Block & block) +void LocalConnection::getProfileEvents(Block & block) { static const NamesAndTypesList column_names_and_types = { {"host_name", std::make_shared()}, @@ -141,7 +141,6 @@ void LocalConnection::updateProfileEvents(Block & block) for (size_t j = 0; j < curr_columns.size(); ++j) columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); } - } void LocalConnection::sendQuery( @@ -163,7 +162,8 @@ void LocalConnection::sendQuery( if (!current_database.empty()) query_context->setCurrentDatabase(current_database); - CurrentThread::QueryScope query_scope_holder(query_context); + query_scope_holder.reset(); + query_scope_holder = std::make_unique(query_context); state.reset(); state.emplace(); @@ -324,8 +324,11 @@ bool LocalConnection::poll(size_t) if (send_profile_events && (state->after_send_profile_events.elapsedMicroseconds() >= query_context->getSettingsRef().interactive_delay)) { + Block block; state->after_send_profile_events.restart(); next_packet_type = Protocol::Server::ProfileEvents; + getProfileEvents(block); + state->block.emplace(std::move(block)); return true; } @@ -557,9 +560,9 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } -ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress) +ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress, bool send_profile_events) { - return std::make_unique(current_context, send_progress); + return std::make_unique(current_context, send_progress, send_profile_events); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 9f0b6989c47..7edb791a177 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -66,7 +66,11 @@ public: IServerConnection::Type getConnectionType() const override { return IServerConnection::Type::LOCAL; } - static ServerConnectionPtr createConnection(const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false); + static ServerConnectionPtr createConnection( + const ConnectionParameters & connection_parameters, + ContextPtr current_context, + bool send_progress = false, + bool send_profile_events = false); void setDefaultDatabase(const String & database) override; @@ -133,7 +137,7 @@ private: void updateProgress(const Progress & value); - void updateProfileEvents(Block & block); + void getProfileEvents(Block & block); bool pollImpl(); @@ -153,5 +157,6 @@ private: String current_database; ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; + std::unique_ptr query_scope_holder; }; } diff --git a/src/Client/Suggest.cpp b/src/Client/Suggest.cpp index 738c98d2119..b711008e233 100644 --- a/src/Client/Suggest.cpp +++ b/src/Client/Suggest.cpp @@ -103,6 +103,7 @@ void Suggest::load(ContextPtr context, const ConnectionParameters & connection_p { loading_thread = std::thread([context=Context::createCopy(context), connection_parameters, suggestion_limit, this] { + ThreadStatus thread_status; for (size_t retry = 0; retry < 10; ++retry) { try From e6b29167da0e9d51d7956cc1e2825d462ccd224b Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 15 Feb 2022 20:26:53 +0800 Subject: [PATCH 03/87] fix style --- src/Client/LocalConnection.cpp | 5 +++-- src/Interpreters/ProfileEventsExt.cpp | 10 +++++----- src/Interpreters/ProfileEventsExt.h | 4 ++-- src/Server/TCPHandler.cpp | 5 +++-- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index e9cf335bd76..08ba485d5fc 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -64,11 +64,13 @@ void LocalConnection::updateProgress(const Progress & value) void LocalConnection::getProfileEvents(Block & block) { + using namespace ProfileEvents; + static const NamesAndTypesList column_names_and_types = { {"host_name", std::make_shared()}, {"current_time", std::make_shared()}, {"thread_id", std::make_shared()}, - {"type", ProfileEvents::TypeEnum}, + {"type", TypeEnum}, {"name", std::make_shared()}, {"value", std::make_shared()}, }; @@ -77,7 +79,6 @@ void LocalConnection::getProfileEvents(Block & block) for (auto const & name_and_type : column_names_and_types) temp_columns.emplace_back(name_and_type.type, name_and_type.name); - using namespace ProfileEvents; block = Block(std::move(temp_columns)); MutableColumns columns = block.mutateColumns(); auto thread_group = CurrentThread::getGroup(); diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 263ee9acec6..173df507c65 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -37,7 +37,7 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, if (nonzero_only && 0 == value) continue; - const char * desc = ProfileEvents::getName(event); + const char * desc = getName(event); key_column.insertData(desc, strlen(desc)); value_column.insert(value); size++; @@ -52,14 +52,14 @@ void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumn size_t rows = 0; auto & name_column = columns[NAME_COLUMN_INDEX]; auto & value_column = columns[VALUE_COLUMN_INDEX]; - for (ProfileEvents::Event event = 0; event < ProfileEvents::Counters::num_counters; ++event) + for (Event event = 0; event < Counters::num_counters; ++event) { Int64 value = snapshot.counters[event]; if (value == 0) continue; - const char * desc = ProfileEvents::getName(event); + const char * desc = getName(event); name_column->insertData(desc, strlen(desc)); value_column->insert(value); rows++; @@ -72,7 +72,7 @@ void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumn columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::INCREMENT); + columns[i++]->insert(Type::INCREMENT); } } @@ -83,7 +83,7 @@ void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumn columns[i++]->insertData(host_name.data(), host_name.size()); columns[i++]->insert(UInt64(snapshot.current_time)); columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::GAUGE); + columns[i++]->insert(Type::GAUGE); columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); columns[i++]->insert(snapshot.memory_usage); diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 0edaec64d25..ebb6981405f 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -13,12 +13,12 @@ constexpr size_t VALUE_COLUMN_INDEX = 5; struct ProfileEventsSnapshot { UInt64 thread_id; - ProfileEvents::CountersIncrement counters; + CountersIncrement counters; Int64 memory_usage; time_t current_time; }; -using ThreadIdToCountersSnapshot = std::unordered_map; +using ThreadIdToCountersSnapshot = std::unordered_map; /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1198cc9271f..2f6882643ba 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -855,6 +855,8 @@ void TCPHandler::sendExtremes(const Block & extremes) void TCPHandler::sendProfileEvents() { + using namespace ProfileEvents; + if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) return; @@ -862,7 +864,7 @@ void TCPHandler::sendProfileEvents() { "host_name", std::make_shared() }, { "current_time", std::make_shared() }, { "thread_id", std::make_shared() }, - { "type", ProfileEvents::TypeEnum }, + { "type", TypeEnum }, { "name", std::make_shared() }, { "value", std::make_shared() }, }; @@ -873,7 +875,6 @@ void TCPHandler::sendProfileEvents() Block block(std::move(temp_columns)); - using namespace ProfileEvents; MutableColumns columns = block.mutateColumns(); auto thread_group = CurrentThread::getGroup(); auto const current_thread_id = CurrentThread::get().thread_id; From dd37b237ab4f949e27d5bdbfb9a4bc51bc5e801d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 18 Jan 2022 12:41:33 +0100 Subject: [PATCH 04/87] Changing repository url to packages.clickhouse.com --- docker/server/Dockerfile | 4 ++-- docs/_includes/install/deb.sh | 4 ++-- docs/_includes/install/rpm.sh | 7 +++---- docs/_includes/install/tgz.sh | 27 +++++++++++++------------- docs/en/getting-started/install.md | 29 +++++----------------------- docs/ja/getting-started/install.md | 29 +++++----------------------- docs/ru/getting-started/install.md | 31 +++++------------------------- docs/zh/getting-started/install.md | 29 +++++----------------------- 8 files changed, 41 insertions(+), 119 deletions(-) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5b10d1fc490..5b7990ab030 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" +ARG repository="deb https://packages.clickhouse.com/deb stable main" ARG version=22.1.1.* # set non-empty deb_location_url url to create a docker image @@ -58,7 +58,7 @@ RUN groupadd -r clickhouse --gid=101 \ wget \ tzdata \ && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && if [ -n "$deb_location_url" ]; then \ echo "installing from custom url with deb packages: $deb_location_url" \ diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh index 21106e9fc47..9dceef4c245 100644 --- a/docs/_includes/install/deb.sh +++ b/docs/_includes/install/deb.sh @@ -1,7 +1,7 @@ sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ +echo "deb https://packages.clickhouse.com/deb stable main/" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh index e3fd1232047..ff99018f872 100644 --- a/docs/_includes/install/rpm.sh +++ b/docs/_includes/install/rpm.sh @@ -1,7 +1,6 @@ -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client +sudo yum install -y yum-utils +sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo +sudo yum install -y clickhouse-server clickhouse-client sudo /etc/init.d/clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/tgz.sh b/docs/_includes/install/tgz.sh index 0994510755b..4ba5890b32b 100644 --- a/docs/_includes/install/tgz.sh +++ b/docs/_includes/install/tgz.sh @@ -1,19 +1,20 @@ -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ +LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz +export LATEST_VERSION +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index c03daf45b02..da6e225f745 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -29,7 +29,7 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. -You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/). +You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable). #### Packages {#packages} @@ -49,9 +49,7 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat First, you need to add the official repository: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. @@ -62,34 +60,17 @@ Then run these commands to install packages: sudo yum install clickhouse-server clickhouse-client ``` -You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64). +You can also download and install packages manually from [here](https://packages.clickhouse.com/rpm/stable). ### From Tgz Archives {#from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. -The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/. +The required version can be downloaded with `curl` or `wget` from repository https://packages.clickhouse.com/tgz/. After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. diff --git a/docs/ja/getting-started/install.md b/docs/ja/getting-started/install.md index 7a2a822fe52..575506c3c4b 100644 --- a/docs/ja/getting-started/install.md +++ b/docs/ja/getting-started/install.md @@ -30,7 +30,7 @@ Debian や Ubuntu 用にコンパイル済みの公式パッケージ `deb` を 最新版を使いたい場合は、`stable`を`testing`に置き換えてください。(テスト環境ではこれを推奨します) -同様に、[こちら](https://repo.clickhouse.com/deb/stable/main/)からパッケージをダウンロードして、手動でインストールすることもできます。 +同様に、[こちら](https://packages.clickhouse.com/deb/pool/stable)からパッケージをダウンロードして、手動でインストールすることもできます。 #### パッケージ {#packages} @@ -46,9 +46,7 @@ CentOS、RedHat、その他すべてのrpmベースのLinuxディストリビュ まず、公式リポジトリを追加する必要があります: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` 最新版を使いたい場合は `stable` を `testing` に置き換えてください。(テスト環境ではこれが推奨されています)。`prestable` もしばしば同様に利用できます。 @@ -59,33 +57,16 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -同様に、[こちら](https://repo.clickhouse.com/rpm/stable/x86_64) からパッケージをダウンロードして、手動でインストールすることもできます。 +同様に、[こちら](https://packages.clickhouse.com/rpm/stable) からパッケージをダウンロードして、手動でインストールすることもできます。 ### Tgzアーカイブから {#from-tgz-archives} すべての Linux ディストリビューションで、`deb` や `rpm` パッケージがインストールできない場合は、公式のコンパイル済み `tgz` アーカイブを使用することをお勧めします。 -必要なバージョンは、リポジトリ https://repo.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です: +必要なバージョンは、リポジトリ https://packages.clickhouse.com/tgz/ から `curl` または `wget` でダウンロードできます。その後、ダウンロードしたアーカイブを解凍し、インストールスクリプトでインストールしてください。最新版の例は以下です: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` 本番環境では、最新の `stable` バージョンを使うことをお勧めします。GitHub のページ https://github.com/ClickHouse/ClickHouse/tags で 接尾辞 `-stable` となっているバージョン番号として確認できます。 diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index a12773a75b0..84f9bc576e9 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -27,11 +27,9 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su {% include 'install/deb.sh' %} ``` -Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/. - Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. -Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/). +Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable). #### Пакеты {#packages} @@ -51,9 +49,7 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Сначала нужно подключить официальный репозиторий: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. @@ -64,34 +60,17 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.com/rpm/stable/x86_64. +Также есть возможность установить пакеты вручную, скачав отсюда: https://packages.clickhouse.com/rpm/stable. ### Из Tgz архивов {#from-tgz-archives} Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов. -Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.com/tgz/. +Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` Для production окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`. diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index eec3aabe2a1..6a966355fae 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -29,7 +29,7 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/deb/stable/main/)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。 安装包列表: @@ -45,9 +45,7 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not 首先,您需要添加官方存储库: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 @@ -58,35 +56,18 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/rpm/stable/x86_64)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/rpm/stable)。 ### `Tgz`安装包 {#from-tgz-archives} 如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。 -所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.com/tgz/`下载。 +所需的版本可以通过`curl`或`wget`从存储库`https://packages.clickhouse.com/tgz/`下载。 下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新稳定版本的安装示例: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` 对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 From 01bd5858c7f9cfce8daf39a3a635acda6dee7e4a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 23 Feb 2022 15:36:37 +0100 Subject: [PATCH 05/87] Add clickhouse-rpm.repo to repository --- packages/clickhouse-rpm.repo | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 packages/clickhouse-rpm.repo diff --git a/packages/clickhouse-rpm.repo b/packages/clickhouse-rpm.repo new file mode 100644 index 00000000000..27321123dc1 --- /dev/null +++ b/packages/clickhouse-rpm.repo @@ -0,0 +1,31 @@ +[clickhouse-stable] +name=ClickHouse - Stable Repository +baseurl=https://packages.clickhouse.com/rpm/stable/ +gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-lts] +name=ClickHouse - LTS Repository +baseurl=https://packages.clickhouse.com/rpm/lts/ +gpgkey=https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-prestable] +name=ClickHouse - Pre-stable Repository +baseurl=https://packages.clickhouse.com/rpm/prestable/ +gpgkey=https://packages.clickhouse.com/rpm/prestable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-testing] +name=ClickHouse - Testing Repository +baseurl=https://packages.clickhouse.com/rpm/testing/ +gpgkey=https://packages.clickhouse.com/rpm/testing/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=1 From eaf6d8c5f60b2f514b41c3cc4496d7bac1d6714a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 24 Feb 2022 17:35:29 +0100 Subject: [PATCH 06/87] Update DiskS3.cpp --- src/Disks/S3/DiskS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index aff4985a4f1..5d61285981b 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -286,7 +286,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size /*, std::move(schedule) */); + buf_size, std::move(schedule)); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { From 6c8401bfbd4ae820c7e91c3e53bfc1a31fe39925 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Feb 2022 16:35:37 +0100 Subject: [PATCH 07/87] Fix --- src/Storages/FileLog/StorageFileLog.cpp | 36 ++++++++++++------------- src/Storages/FileLog/StorageFileLog.h | 5 ++-- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dac70e362ed..4a9da5cbf87 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -53,6 +53,7 @@ StorageFileLog::StorageFileLog( ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -61,6 +62,7 @@ StorageFileLog::StorageFileLog( , WithContext(context_->getGlobalContext()) , filelog_settings(std::move(settings)) , path(path_) + , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) @@ -94,18 +96,13 @@ StorageFileLog::StorageFileLog( void StorageFileLog::loadMetaFiles(bool attach) { - const auto & storage = getStorageID(); - /// FIXME Why do we need separate directory? Why not to use data directory? - root_meta_path - = std::filesystem::path(getContext()->getPath()) / "stream_engines/filelog/" / DatabaseCatalog::getPathForUUID(storage.uuid); - /// Attach table if (attach) { /// Meta file may lost, log and create directory - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - /// Create root_meta_path directory when store meta data + /// Create metadata_base_path directory when store meta data LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName()); } /// Load all meta info to file_infos; @@ -114,14 +111,14 @@ void StorageFileLog::loadMetaFiles(bool attach) /// Create table, just create meta data directory else { - if (std::filesystem::exists(root_meta_path)) + if (std::filesystem::exists(metadata_base_path)) { throw Exception( ErrorCodes::TABLE_METADATA_ALREADY_EXISTS, "Metadata files already exist by path: {}, remove them manually if it is intended", - root_meta_path); + metadata_base_path); } - /// We do not create the root_meta_path directory at creation time, create it at the moment of serializing + /// We do not create the metadata_base_path directory at creation time, create it at the moment of serializing /// meta files, such that can avoid unnecessarily create this directory if create table failed. } } @@ -212,9 +209,9 @@ void StorageFileLog::loadFiles() void StorageFileLog::serialize() const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } for (const auto & [inode, meta] : file_infos.meta_by_inode) { @@ -236,9 +233,9 @@ void StorageFileLog::serialize() const void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } auto full_name = getFullMetaPath(file_meta.file_name); if (!std::filesystem::exists(full_name)) @@ -257,11 +254,11 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const void StorageFileLog::deserialize() { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) return; /// In case of single file (not a watched directory), /// iterated directory always has one file inside. - for (const auto & dir_entry : std::filesystem::directory_iterator{root_meta_path}) + for (const auto & dir_entry : std::filesystem::directory_iterator{metadata_base_path}) { if (!dir_entry.is_regular_file()) { @@ -269,7 +266,7 @@ void StorageFileLog::deserialize() ErrorCodes::BAD_FILE_TYPE, "The file {} under {} is not a regular file when deserializing meta files", dir_entry.path().c_str(), - root_meta_path); + metadata_base_path); } ReadBufferFromFile in(dir_entry.path().c_str()); @@ -373,8 +370,8 @@ void StorageFileLog::drop() { try { - if (std::filesystem::exists(root_meta_path)) - std::filesystem::remove_all(root_meta_path); + if (std::filesystem::exists(metadata_base_path)) + std::filesystem::remove_all(metadata_base_path); } catch (...) { @@ -802,6 +799,7 @@ void registerStorageFileLog(StorageFactory & factory) args.getContext(), args.columns, path, + args.relative_data_path, format, std::move(filelog_settings), args.comment, diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index f7e67747965..1d1ae30f58b 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -89,7 +89,7 @@ public: auto & getFileInfos() { return file_infos; } - String getFullMetaPath(const String & file_name) const { return std::filesystem::path(root_meta_path) / file_name; } + String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } NamesAndTypesList getVirtuals() const override; @@ -131,6 +131,7 @@ protected: ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -145,7 +146,7 @@ private: /// If path argument of the table is a regular file, it equals to user_files_path /// otherwise, it equals to user_files_path/ + path_argument/, e.g. path String root_data_path; - String root_meta_path; + String metadata_base_path; FileInfos file_infos; From 2ba9010a347c721f7d90ea8b0d0819a04e848c23 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 25 Feb 2022 17:53:19 +0100 Subject: [PATCH 08/87] Fix --- .../FileLog/ReadBufferFromFileLog.cpp | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 49 +++++++++++++------ src/Storages/FileLog/StorageFileLog.h | 3 +- 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/src/Storages/FileLog/ReadBufferFromFileLog.cpp b/src/Storages/FileLog/ReadBufferFromFileLog.cpp index 86caac57a53..ebb0925f128 100644 --- a/src/Storages/FileLog/ReadBufferFromFileLog.cpp +++ b/src/Storages/FileLog/ReadBufferFromFileLog.cpp @@ -134,7 +134,7 @@ void ReadBufferFromFileLog::readNewRecords(ReadBufferFromFileLog::Records & new_ UInt64 current_position = reader.tellg(); StorageFileLog::assertStreamGood(reader); - file_meta.last_writen_position = current_position; + file_meta.last_written_position = current_position; /// stream reach to end if (current_position == file_meta.last_open_end) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 4a9da5cbf87..604116808ab 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -222,12 +222,12 @@ void StorageFileLog::serialize() const } else { - checkOffsetIsValid(full_name, meta.last_writen_position); + checkOffsetIsValid(full_name, meta.last_written_position); } WriteBufferFromFile out(full_name); writeIntText(inode, out); writeChar('\n', out); - writeIntText(meta.last_writen_position, out); + writeIntText(meta.last_written_position, out); } } @@ -244,12 +244,12 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const } else { - checkOffsetIsValid(full_name, file_meta.last_writen_position); + checkOffsetIsValid(full_name, file_meta.last_written_position); } WriteBufferFromFile out(full_name); writeIntText(inode, out); writeChar('\n', out); - writeIntText(file_meta.last_writen_position, out); + writeIntText(file_meta.last_written_position, out); } void StorageFileLog::deserialize() @@ -284,7 +284,7 @@ void StorageFileLog::deserialize() } meta.file_name = dir_entry.path().filename(); - meta.last_writen_position = last_written_pos; + meta.last_written_position = last_written_pos; file_infos.meta_by_inode.emplace(inode, meta); } @@ -435,7 +435,17 @@ void StorageFileLog::openFilesAndSetPos() auto & file_ctx = findInMap(file_infos.context_by_name, file); if (file_ctx.status != FileStatus::NO_CHANGE) { - file_ctx.reader.emplace(getFullDataPath(file)); + auto & meta = findInMap(file_infos.meta_by_inode, file_ctx.inode); + + auto current_path = getFullDataPath(file); + if (!std::filesystem::exists(file) && meta.last_written_position != 0) + { + file_ctx.status = FileStatus::REMOVED; + continue; + } + + file_ctx.reader.emplace(current_path); + auto & reader = file_ctx.reader.value(); assertStreamGood(reader); @@ -445,23 +455,24 @@ void StorageFileLog::openFilesAndSetPos() auto file_end = reader.tellg(); assertStreamGood(reader); - auto & meta = findInMap(file_infos.meta_by_inode, file_ctx.inode); - if (meta.last_writen_position > static_cast(file_end)) + if (meta.last_written_position > static_cast(file_end)) { throw Exception( ErrorCodes::CANNOT_READ_ALL_DATA, "Last saved offsset for File {} is bigger than file size ({} > {})", file, - meta.last_writen_position, + meta.last_written_position, file_end); } /// update file end at the moment, used in ReadBuffer and serialize meta.last_open_end = file_end; - reader.seekg(meta.last_writen_position); + reader.seekg(meta.last_written_position); assertStreamGood(reader); } } + + removeInvalidFiles(); serialize(); } @@ -927,6 +938,18 @@ bool StorageFileLog::updateFileInfos() } } } + + removeInvalidFiles(); + + /// These file infos should always have same size(one for one) + assert(file_infos.file_names.size() == file_infos.meta_by_inode.size()); + assert(file_infos.file_names.size() == file_infos.context_by_name.size()); + + return events.empty() || file_infos.file_names.empty(); +} + +void StorageFileLog::removeInvalidFiles() +{ std::vector valid_files; /// Remove file infos with REMOVE status @@ -956,12 +979,6 @@ bool StorageFileLog::updateFileInfos() } } file_infos.file_names.swap(valid_files); - - /// These file infos should always have same size(one for one) - assert(file_infos.file_names.size() == file_infos.meta_by_inode.size()); - assert(file_infos.file_names.size() == file_infos.context_by_name.size()); - - return events.empty() || file_infos.file_names.empty(); } NamesAndTypesList StorageFileLog::getVirtuals() const diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 1d1ae30f58b..2dd45cbee6d 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -72,7 +72,7 @@ public: struct FileMeta { String file_name; - UInt64 last_writen_position = 0; + UInt64 last_written_position = 0; UInt64 last_open_end = 0; }; @@ -205,6 +205,7 @@ private: void deserialize(); static void checkOffsetIsValid(const String & full_name, UInt64 offset); + void removeInvalidFiles(); }; } From 2176d74cd1292f1580157162d717f59afb415103 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 15:11:38 +0800 Subject: [PATCH 09/87] Use connection pool in HiveMetastoreClient 1. remove lock for hive metastore client access 2. auo reconnect when connection is broken --- src/Storages/Hive/HiveCommon.cpp | 134 ++++++++++++++++++------------- src/Storages/Hive/HiveCommon.h | 63 ++++++++++++--- 2 files changed, 130 insertions(+), 67 deletions(-) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index aa19ff042e2..4000e5b8981 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -1,3 +1,4 @@ +#include #include #if USE_HIVE @@ -5,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,6 +17,8 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +const unsigned ThriftHiveMetastoreClientPool::max_connections = 16; + bool HiveMetastoreClient::shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions) { @@ -40,25 +44,42 @@ bool HiveMetastoreClient::shouldUpdateTableMetadata( return false; } +void HiveMetastoreClient::tryCallHiveClient(std::function func) +{ + int i = 0; + String err_msg; + for (; i < max_retry; ++i) + { + auto client = client_pool.get(get_client_timeout); + try + { + func(client); + } + catch (apache::thrift::transport::TTransportException & e) + { + client.expire(); + err_msg = e.what(); + continue; + } + break; + } + if (i >= max_retry) + throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", err_msg); +} + HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata(const String & db_name, const String & table_name) { LOG_TRACE(log, "Get table metadata for {}.{}", db_name, table_name); - std::lock_guard lock{mutex}; auto table = std::make_shared(); std::vector partitions; - try + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) { client->get_table(*table, db_name, table_name); - /// Query the latest partition info to check new change. client->get_partitions(partitions, db_name, table_name, -1); - } - catch (apache::thrift::transport::TTransportException & e) - { - setExpired(); - throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", String(e.what())); - } + }; + tryCallHiveClient(client_call); bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions); String cache_key = getCacheKey(db_name, table_name); @@ -103,23 +124,26 @@ HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata( return metadata; } +std::shared_ptr HiveMetastoreClient::getHiveTable(const String & db_name, const String & table_name) +{ + auto table = std::make_shared(); + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) + { + client->get_table(*table, db_name, table_name); + }; + tryCallHiveClient(client_call); + return table; +} + void HiveMetastoreClient::clearTableMetadata(const String & db_name, const String & table_name) { String cache_key = getCacheKey(db_name, table_name); - std::lock_guard lock{mutex}; HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); if (metadata) table_metadata_cache.remove(cache_key); } -void HiveMetastoreClient::setClient(std::shared_ptr client_) -{ - std::lock_guard lock{mutex}; - client = client_; - clearExpired(); -} - bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const { /// Parameters include keys:numRows,numFiles,rawDataSize,totalSize,transient_lastDdlTime @@ -192,53 +216,55 @@ HiveMetastoreClientFactory & HiveMetastoreClientFactory::instance() return factory; } +using namespace apache::thrift; +using namespace apache::thrift::protocol; +using namespace apache::thrift::transport; +using namespace Apache::Hadoop::Hive; + HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name, ContextPtr context) { - using namespace apache::thrift; - using namespace apache::thrift::protocol; - using namespace apache::thrift::transport; - using namespace Apache::Hadoop::Hive; std::lock_guard lock(mutex); auto it = clients.find(name); - if (it == clients.end() || it->second->isExpired()) + if (it == clients.end()) { - /// Connect to hive metastore - Poco::URI hive_metastore_url(name); - const auto & host = hive_metastore_url.getHost(); - auto port = hive_metastore_url.getPort(); - - std::shared_ptr socket = std::make_shared(host, port); - socket->setKeepAlive(true); - socket->setConnTimeout(conn_timeout_ms); - socket->setRecvTimeout(recv_timeout_ms); - socket->setSendTimeout(send_timeout_ms); - std::shared_ptr transport(new TBufferedTransport(socket)); - std::shared_ptr protocol(new TBinaryProtocol(transport)); - std::shared_ptr thrift_client = std::make_shared(protocol); - try + auto builder = [name]() { - transport->open(); - } - catch (TException & tx) - { - throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); - } - - if (it == clients.end()) - { - HiveMetastoreClientPtr client = std::make_shared(std::move(thrift_client), context); - clients[name] = client; - return client; - } - else - { - it->second->setClient(std::move(thrift_client)); - return it->second; - } + return createThriftHiveMetastoreClient(name); + }; + auto client = std::make_shared(builder, context->getGlobalContext()); + clients[name] = client; + return client; } return it->second; } +const int HiveMetastoreClientFactory::conn_timeout_ms = 10000; +const int HiveMetastoreClientFactory::recv_timeout_ms = 10000; +const int HiveMetastoreClientFactory::send_timeout_ms = 10000; +std::shared_ptr HiveMetastoreClientFactory::createThriftHiveMetastoreClient(const String &name) +{ + Poco::URI hive_metastore_url(name); + const auto & host = hive_metastore_url.getHost(); + auto port = hive_metastore_url.getPort(); + + std::shared_ptr socket = std::make_shared(host, port); + socket->setKeepAlive(true); + socket->setConnTimeout(conn_timeout_ms); + socket->setRecvTimeout(recv_timeout_ms); + socket->setSendTimeout(send_timeout_ms); + std::shared_ptr transport(new TBufferedTransport(socket)); + std::shared_ptr protocol(new TBinaryProtocol(transport)); + std::shared_ptr thrift_client = std::make_shared(protocol); + try + { + transport->open(); + } + catch (TException & tx) + { + throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); + } + return thrift_client; +} } #endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index e88e67b0257..ec3d0b052b4 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -1,5 +1,6 @@ #pragma once +#include #include #if USE_HIVE @@ -10,12 +11,40 @@ #include #include +#include #include namespace DB { +using ThriftHiveMetastoreClientBuilder = std::function()>; + +class ThriftHiveMetastoreClientPool : public PoolBase +{ +public: + using Object = Apache::Hadoop::Hive::ThriftHiveMetastoreClient; + using ObjectPtr = std::shared_ptr; + using Entry = PoolBase::Entry; + explicit ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) + : PoolBase(max_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")) + , builder(builder_) + { + + } + +protected: + ObjectPtr allocObject() override + { + return builder(); + } + +private: + ThriftHiveMetastoreClientBuilder builder; + + const static unsigned max_connections; + +}; class HiveMetastoreClient : public WithContext { public: @@ -26,7 +55,9 @@ public: UInt64 last_modify_time; /// In ms size_t size; - FileInfo() = default; + explicit FileInfo() = default; + FileInfo & operator = (const FileInfo &) = default; + FileInfo(const FileInfo &) = default; FileInfo(const String & path_, UInt64 last_modify_time_, size_t size_) : path(path_), last_modify_time(last_modify_time_), size(size_) { @@ -94,17 +125,18 @@ public: using HiveTableMetadataPtr = std::shared_ptr; - explicit HiveMetastoreClient(std::shared_ptr client_, ContextPtr context_) - : WithContext(context_), client(client_), table_metadata_cache(1000) + explicit HiveMetastoreClient(ThriftHiveMetastoreClientBuilder builder_, ContextPtr context_) + : WithContext(context_) + , table_metadata_cache(1000) + , client_pool(builder_) { } + HiveTableMetadataPtr getTableMetadata(const String & db_name, const String & table_name); + // Access hive table information by hive client + std::shared_ptr getHiveTable(const String & db_name, const String & table_name); void clearTableMetadata(const String & db_name, const String & table_name); - void setClient(std::shared_ptr client_); - bool isExpired() const { return expired; } - void setExpired() { expired = true; } - void clearExpired() { expired = false; } private: static String getCacheKey(const String & db_name, const String & table_name) { return db_name + "." + table_name; } @@ -112,12 +144,15 @@ private: bool shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions); - std::shared_ptr client; + void tryCallHiveClient(std::function func); + LRUCache table_metadata_cache; - mutable std::mutex mutex; - std::atomic expired{false}; + ThriftHiveMetastoreClientPool client_pool; Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); + + const int max_retry = 3; + const UInt64 get_client_timeout = 1000000; }; using HiveMetastoreClientPtr = std::shared_ptr; @@ -128,13 +163,15 @@ public: HiveMetastoreClientPtr getOrCreate(const String & name, ContextPtr context); + static std::shared_ptr createThriftHiveMetastoreClient(const String & name); + private: std::mutex mutex; std::map clients; - const int conn_timeout_ms = 10000; - const int recv_timeout_ms = 10000; - const int send_timeout_ms = 10000; + const static int conn_timeout_ms; + const static int recv_timeout_ms; + const static int send_timeout_ms; }; } From c5e02be44ede2d96466981f07fc54bcddf150f60 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 15:22:54 +0800 Subject: [PATCH 10/87] fixed code-style --- src/Storages/Hive/HiveCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index 4000e5b8981..80110a517b9 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -47,7 +47,7 @@ bool HiveMetastoreClient::shouldUpdateTableMetadata( void HiveMetastoreClient::tryCallHiveClient(std::function func) { int i = 0; - String err_msg; + String err_msg; for (; i < max_retry; ++i) { auto client = client_pool.get(get_client_timeout); From 5885cfd86953487d1f7764ff87ac9c63c03f9b5c Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 16:47:50 +0800 Subject: [PATCH 11/87] fixed bug : unexpected result when using in clause for filtering partitions --- .../integration/hive_server/prepare_hive_data.sh | 5 ++++- .../runner/compose/docker_compose_hive.yml | 2 +- src/Storages/Hive/StorageHive.h | 2 +- tests/integration/test_hive_query/test.py | 12 +++++++++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index afecbb91c5d..fa67f5dbb77 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -2,5 +2,8 @@ hive -e "create database test" hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " +hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " - hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" diff --git a/docker/test/integration/runner/compose/docker_compose_hive.yml b/docker/test/integration/runner/compose/docker_compose_hive.yml index 44f23655d2a..459e8481d0b 100644 --- a/docker/test/integration/runner/compose/docker_compose_hive.yml +++ b/docker/test/integration/runner/compose/docker_compose_hive.yml @@ -1,7 +1,7 @@ version: '2.3' services: hdfs1: - image: lgboustc/hive_test:v1.0 + image: lgboustc/hive_test:v2.0 hostname: hivetest restart: always entrypoint: bash /start.sh diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9629629e057..1377e4a4504 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -36,7 +36,7 @@ public: ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const override { - return false; + return true; } diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index a68ae0b066d..3e89bc418d8 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -30,7 +30,8 @@ def test_create_parquet_table(started_cluster): node.query("set input_format_parquet_allow_missing_columns = true") result = node.query(""" DROP TABLE IF EXISTS default.demo_parquet; - CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day); + CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); """) logging.info("create result {}".format(result)) time.sleep(120) @@ -70,6 +71,15 @@ def test_parquet_groupby(started_cluster): 2021-11-16 2 """ assert result == expected_result + +def test_parquet_in_filter(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') + """) + expected_result = """2021-11-05 2""" + assert result == expected_result def test_orc_groupby(started_cluster): logging.info('Start testing groupby ...') node = started_cluster.instances['h0_0_0'] From 6473767c9972faa481fafa716fba7326e3e25622 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 17:10:56 +0800 Subject: [PATCH 12/87] fixed code style --- src/Storages/Hive/HiveCommon.cpp | 27 +++++++++++++++++---------- src/Storages/Hive/HiveCommon.h | 17 +---------------- 2 files changed, 18 insertions(+), 26 deletions(-) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index 80110a517b9..ac4abfb0f3d 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -17,7 +17,17 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -const unsigned ThriftHiveMetastoreClientPool::max_connections = 16; +static const unsigned max_hive_metastore_client_connections = 16; +static const int max_hive_metastore_client_retry = 3; +static const UInt64 get_hive_metastore_client_timeout = 1000000; +static const int hive_metastore_client_conn_timeout_ms = 10000; +static const int hive_metastore_client_recv_timeout_ms = 10000; +static const int hive_metastore_client_send_timeout_ms = 10000; + +ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) + : PoolBase(max_hive_metastore_client_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")), builder(builder_) +{ +} bool HiveMetastoreClient::shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions) @@ -48,9 +58,9 @@ void HiveMetastoreClient::tryCallHiveClient(std::function= max_retry) + if (i >= max_hive_metastore_client_retry) throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", err_msg); } @@ -238,9 +248,6 @@ HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & na } return it->second; } -const int HiveMetastoreClientFactory::conn_timeout_ms = 10000; -const int HiveMetastoreClientFactory::recv_timeout_ms = 10000; -const int HiveMetastoreClientFactory::send_timeout_ms = 10000; std::shared_ptr HiveMetastoreClientFactory::createThriftHiveMetastoreClient(const String &name) { Poco::URI hive_metastore_url(name); @@ -249,9 +256,9 @@ std::shared_ptr HiveMetastoreClientFactory::createThr std::shared_ptr socket = std::make_shared(host, port); socket->setKeepAlive(true); - socket->setConnTimeout(conn_timeout_ms); - socket->setRecvTimeout(recv_timeout_ms); - socket->setSendTimeout(send_timeout_ms); + socket->setConnTimeout(hive_metastore_client_conn_timeout_ms); + socket->setRecvTimeout(hive_metastore_client_recv_timeout_ms); + socket->setSendTimeout(hive_metastore_client_send_timeout_ms); std::shared_ptr transport(new TBufferedTransport(socket)); std::shared_ptr protocol(new TBinaryProtocol(transport)); std::shared_ptr thrift_client = std::make_shared(protocol); diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index ec3d0b052b4..b8075457a02 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -26,12 +26,7 @@ public: using Object = Apache::Hadoop::Hive::ThriftHiveMetastoreClient; using ObjectPtr = std::shared_ptr; using Entry = PoolBase::Entry; - explicit ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) - : PoolBase(max_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")) - , builder(builder_) - { - - } + explicit ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_); protected: ObjectPtr allocObject() override @@ -41,9 +36,6 @@ protected: private: ThriftHiveMetastoreClientBuilder builder; - - const static unsigned max_connections; - }; class HiveMetastoreClient : public WithContext { @@ -150,9 +142,6 @@ private: ThriftHiveMetastoreClientPool client_pool; Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); - - const int max_retry = 3; - const UInt64 get_client_timeout = 1000000; }; using HiveMetastoreClientPtr = std::shared_ptr; @@ -168,10 +157,6 @@ public: private: std::mutex mutex; std::map clients; - - const static int conn_timeout_ms; - const static int recv_timeout_ms; - const static int send_timeout_ms; }; } From afad4bf4144f35a5773d939205c3b419eef57153 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 17:14:56 +0800 Subject: [PATCH 13/87] update test --- tests/integration/test_hive_query/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 3e89bc418d8..3be42f30473 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -76,7 +76,7 @@ def test_parquet_in_filter(started_cluster): logging.info('Start testing groupby ...') node = started_cluster.instances['h0_0_0'] result = node.query(""" - SELECT day, count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') + SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') """) expected_result = """2021-11-05 2""" assert result == expected_result From 53190ed4d43b939277ae71ef6a2bfd8b00831251 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 17:40:08 +0800 Subject: [PATCH 14/87] updat test --- tests/integration/test_hive_query/test.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 3be42f30473..5f7f5aa61ca 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -31,6 +31,17 @@ def test_create_parquet_table(started_cluster): result = node.query(""" DROP TABLE IF EXISTS default.demo_parquet; CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day); + """) + logging.info("create result {}".format(result)) + time.sleep(120) + assert result.strip() == '' + +def test_create_parquet_table_1(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" + DROP TABLE IF EXISTS default.demo_parquet_parts; CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); """) logging.info("create result {}".format(result)) @@ -78,7 +89,7 @@ def test_parquet_in_filter(started_cluster): result = node.query(""" SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') """) - expected_result = """2021-11-05 2""" + expected_result = """2""" assert result == expected_result def test_orc_groupby(started_cluster): logging.info('Start testing groupby ...') From aa8db563d684a9595d20422a029201996bf95469 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 18:01:47 +0800 Subject: [PATCH 15/87] update test --- docker/test/integration/hive_server/Dockerfile | 1 + docker/test/integration/hive_server/prepare_hive_data.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index fa6e4bf6313..4056f8aee45 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -38,6 +38,7 @@ COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/ #COPY core-site.xml /hadoop-3.1.0/etc/hadoop COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop COPY hive-site.xml /apache-hive-2.3.9-bin/conf +RUN echo "123" COPY prepare_hive_data.sh / COPY demo_data.txt / diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index fa67f5dbb77..b5ba2a08194 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -2,7 +2,7 @@ hive -e "create database test" hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " -hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" +hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" From b8a2deba0f95847d40a4a8e223597baa3432b7e5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 20:11:59 +0800 Subject: [PATCH 16/87] update tests --- docker/test/integration/hive_server/prepare_hive_data.sh | 1 + tests/integration/test_hive_query/test.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index b5ba2a08194..8126b975612 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -5,5 +5,6 @@ hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" + hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 5f7f5aa61ca..621614813da 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -89,7 +89,9 @@ def test_parquet_in_filter(started_cluster): result = node.query(""" SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') """) - expected_result = """2""" + expected_result = """2 +""" + logging.info("query result:{}".format(result)) assert result == expected_result def test_orc_groupby(started_cluster): logging.info('Start testing groupby ...') @@ -150,4 +152,5 @@ def test_cache_read_bytes(started_cluster): assert result == expected_result result = node.query("select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0") logging.info("Read bytes from cache:{}".format(result)) + assert result.strip() != '0' From 24caa70243c237ccbc88555b6f4d964ccf7c0e9f Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 28 Feb 2022 20:23:00 +0800 Subject: [PATCH 17/87] remove meaningless codes --- docker/test/integration/hive_server/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index 4056f8aee45..fa6e4bf6313 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -38,7 +38,6 @@ COPY hadoop-env.sh /hadoop-3.1.0/etc/hadoop/ #COPY core-site.xml /hadoop-3.1.0/etc/hadoop COPY core-site.xml.template /hadoop-3.1.0/etc/hadoop COPY hive-site.xml /apache-hive-2.3.9-bin/conf -RUN echo "123" COPY prepare_hive_data.sh / COPY demo_data.txt / From f83132bad29fb44ad0ab0bb79664f44cb6e6fb85 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 1 Mar 2022 15:54:23 +0800 Subject: [PATCH 18/87] finish dev --- src/Client/ClientBase.cpp | 2 +- src/Client/LocalConnection.cpp | 80 +---------------------- src/Interpreters/ProfileEventsExt.cpp | 94 ++++++++++++++++++++++++++- src/Interpreters/ProfileEventsExt.h | 10 +-- src/Server/TCPHandler.cpp | 87 +------------------------ 5 files changed, 102 insertions(+), 171 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7dfa60ad560..b93fef04be4 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -866,7 +866,7 @@ void ClientBase::onProfileEvents(Block & block) if (rows == 0) return; - if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) + if (getName() == "local" || server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) { const auto & array_thread_id = typeid_cast(*block.getByName("thread_id").column).getData(); const auto & names = typeid_cast(*block.getByName("name").column); diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 08ba485d5fc..02b437d7ce6 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB @@ -64,84 +63,7 @@ void LocalConnection::updateProgress(const Progress & value) void LocalConnection::getProfileEvents(Block & block) { - using namespace ProfileEvents; - - static const NamesAndTypesList column_names_and_types = { - {"host_name", std::make_shared()}, - {"current_time", std::make_shared()}, - {"thread_id", std::make_shared()}, - {"type", TypeEnum}, - {"name", std::make_shared()}, - {"value", std::make_shared()}, - }; - - ColumnsWithTypeAndName temp_columns; - for (auto const & name_and_type : column_names_and_types) - temp_columns.emplace_back(name_and_type.type, name_and_type.name); - - block = Block(std::move(temp_columns)); - MutableColumns columns = block.mutateColumns(); - auto thread_group = CurrentThread::getGroup(); - auto const current_thread_id = CurrentThread::get().thread_id; - std::vector snapshots; - ThreadIdToCountersSnapshot new_snapshots; - ProfileEventsSnapshot group_snapshot; - { - auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); - snapshots.reserve(stats.size()); - - for (auto & stat : stats) - { - auto const thread_id = stat.thread_id; - if (thread_id == current_thread_id) - continue; - auto current_time = time(nullptr); - auto previous_snapshot = last_sent_snapshots.find(thread_id); - auto increment = - previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(stat.counters, previous_snapshot->second) - : CountersIncrement(stat.counters); - snapshots.push_back(ProfileEventsSnapshot{ - thread_id, - std::move(increment), - stat.memory_usage, - current_time - }); - new_snapshots[thread_id] = std::move(stat.counters); - } - - group_snapshot.thread_id = 0; - group_snapshot.current_time = time(nullptr); - group_snapshot.memory_usage = thread_group->memory_tracker.get(); - auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); - auto prev_group_snapshot = last_sent_snapshots.find(0); - group_snapshot.counters = - prev_group_snapshot != last_sent_snapshots.end() - ? CountersIncrement(group_counters, prev_group_snapshot->second) - : CountersIncrement(group_counters); - new_snapshots[0] = std::move(group_counters); - } - last_sent_snapshots = std::move(new_snapshots); - - const String server_display_name = "localhost"; - for (auto & snapshot : snapshots) - { - dumpProfileEvents(snapshot, columns, server_display_name); - dumpMemoryTracker(snapshot, columns, server_display_name); - } - dumpProfileEvents(group_snapshot, columns, server_display_name); - dumpMemoryTracker(group_snapshot, columns, server_display_name); - - MutableColumns logs_columns; - Block curr_block; - size_t rows = 0; - - for (; state->profile_queue->tryPop(curr_block); ++rows) - { - auto curr_columns = curr_block.getColumns(); - for (size_t j = 0; j < curr_columns.size(); ++j) - columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); - } + ProfileEvents::getProfileEvents("local", state->profile_queue, block, last_sent_snapshots); } void LocalConnection::sendQuery( diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 173df507c65..6961d70529e 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -1,6 +1,7 @@ #include "ProfileEventsExt.h" #include #include +#include #include #include #include @@ -46,8 +47,8 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, offsets.push_back(offsets.back() + size); } - -void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +/// Add records about provided non-zero ProfileEvents::Counters. +static void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) { size_t rows = 0; auto & name_column = columns[NAME_COLUMN_INDEX]; @@ -76,7 +77,7 @@ void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumn } } -void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) { { size_t i = 0; @@ -90,4 +91,91 @@ void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumn } } +void getProfileEvents( + const String & server_display_name, + DB::InternalProfileEventsQueuePtr profile_queue, + DB::Block & block, + ThreadIdToCountersSnapshot & last_sent_snapshots) +{ + using namespace DB; + static const NamesAndTypesList column_names_and_types = { + {"host_name", std::make_shared()}, + {"current_time", std::make_shared()}, + {"thread_id", std::make_shared()}, + {"type", TypeEnum}, + {"name", std::make_shared()}, + {"value", std::make_shared()}, + }; + + ColumnsWithTypeAndName temp_columns; + for (auto const & name_and_type : column_names_and_types) + temp_columns.emplace_back(name_and_type.type, name_and_type.name); + + block = std::move(temp_columns); + MutableColumns columns = block.mutateColumns(); + auto thread_group = CurrentThread::getGroup(); + auto const current_thread_id = CurrentThread::get().thread_id; + std::vector snapshots; + ThreadIdToCountersSnapshot new_snapshots; + ProfileEventsSnapshot group_snapshot; + { + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) + { + auto const thread_id = stat.thread_id; + if (thread_id == current_thread_id) + continue; + auto current_time = time(nullptr); + auto previous_snapshot = last_sent_snapshots.find(thread_id); + auto increment = + previous_snapshot != last_sent_snapshots.end() + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); + snapshots.push_back(ProfileEventsSnapshot{ + thread_id, + std::move(increment), + stat.memory_usage, + current_time + }); + new_snapshots[thread_id] = std::move(stat.counters); + } + + group_snapshot.thread_id = 0; + group_snapshot.current_time = time(nullptr); + group_snapshot.memory_usage = thread_group->memory_tracker.get(); + auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); + auto prev_group_snapshot = last_sent_snapshots.find(0); + group_snapshot.counters = + prev_group_snapshot != last_sent_snapshots.end() + ? CountersIncrement(group_counters, prev_group_snapshot->second) + : CountersIncrement(group_counters); + new_snapshots[0] = std::move(group_counters); + } + last_sent_snapshots = std::move(new_snapshots); + + for (auto & snapshot : snapshots) + { + dumpProfileEvents(snapshot, columns, server_display_name); + dumpMemoryTracker(snapshot, columns, server_display_name); + } + dumpProfileEvents(group_snapshot, columns, server_display_name); + dumpMemoryTracker(group_snapshot, columns, server_display_name); + + Block curr_block; + size_t rows = 0; + + for (; profile_queue->tryPop(curr_block); ++rows) + { + auto curr_columns = curr_block.getColumns(); + for (size_t j = 0; j < curr_columns.size(); ++j) + columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); + } + + bool empty = columns[0]->empty(); + if (!empty) + block.setColumns(std::move(columns)); +} + } diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index ebb6981405f..7d9fc512d15 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -23,10 +24,11 @@ using ThreadIdToCountersSnapshot = std::unordered_map #include #include -#include #include #include #include @@ -855,93 +854,13 @@ void TCPHandler::sendExtremes(const Block & extremes) void TCPHandler::sendProfileEvents() { - using namespace ProfileEvents; - if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) return; - static const NamesAndTypesList column_names_and_types = { - { "host_name", std::make_shared() }, - { "current_time", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "type", TypeEnum }, - { "name", std::make_shared() }, - { "value", std::make_shared() }, - }; - - ColumnsWithTypeAndName temp_columns; - for (auto const & name_and_type : column_names_and_types) - temp_columns.emplace_back(name_and_type.type, name_and_type.name); - - Block block(std::move(temp_columns)); - - MutableColumns columns = block.mutateColumns(); - auto thread_group = CurrentThread::getGroup(); - auto const current_thread_id = CurrentThread::get().thread_id; - std::vector snapshots; - ThreadIdToCountersSnapshot new_snapshots; - ProfileEventsSnapshot group_snapshot; + Block block; + ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots); + if (!!block.rows()) { - auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); - snapshots.reserve(stats.size()); - - for (auto & stat : stats) - { - auto const thread_id = stat.thread_id; - if (thread_id == current_thread_id) - continue; - auto current_time = time(nullptr); - auto previous_snapshot = last_sent_snapshots.find(thread_id); - auto increment = - previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(stat.counters, previous_snapshot->second) - : CountersIncrement(stat.counters); - snapshots.push_back(ProfileEventsSnapshot{ - thread_id, - std::move(increment), - stat.memory_usage, - current_time - }); - new_snapshots[thread_id] = std::move(stat.counters); - } - - group_snapshot.thread_id = 0; - group_snapshot.current_time = time(nullptr); - group_snapshot.memory_usage = thread_group->memory_tracker.get(); - auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); - auto prev_group_snapshot = last_sent_snapshots.find(0); - group_snapshot.counters = - prev_group_snapshot != last_sent_snapshots.end() - ? CountersIncrement(group_counters, prev_group_snapshot->second) - : CountersIncrement(group_counters); - new_snapshots[0] = std::move(group_counters); - } - last_sent_snapshots = std::move(new_snapshots); - - for (auto & snapshot : snapshots) - { - dumpProfileEvents(snapshot, columns, server_display_name); - dumpMemoryTracker(snapshot, columns, server_display_name); - } - dumpProfileEvents(group_snapshot, columns, server_display_name); - dumpMemoryTracker(group_snapshot, columns, server_display_name); - - MutableColumns logs_columns; - Block curr_block; - size_t rows = 0; - - for (; state.profile_queue->tryPop(curr_block); ++rows) - { - auto curr_columns = curr_block.getColumns(); - for (size_t j = 0; j < curr_columns.size(); ++j) - columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); - } - - bool empty = columns[0]->empty(); - if (!empty) - { - block.setColumns(std::move(columns)); - initProfileEventsBlockOutput(block); writeVarUInt(Protocol::Server::ProfileEvents, *out); From 5ed41bda9b824d8de499e478a35cf3097ecde99e Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 1 Mar 2022 17:20:32 +0800 Subject: [PATCH 19/87] fixed code style --- src/Storages/Hive/HiveCommon.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index ac4abfb0f3d..a9d0c22d6a5 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -259,8 +259,8 @@ std::shared_ptr HiveMetastoreClientFactory::createThr socket->setConnTimeout(hive_metastore_client_conn_timeout_ms); socket->setRecvTimeout(hive_metastore_client_recv_timeout_ms); socket->setSendTimeout(hive_metastore_client_send_timeout_ms); - std::shared_ptr transport(new TBufferedTransport(socket)); - std::shared_ptr protocol(new TBinaryProtocol(transport)); + std::shared_ptr transport = std::make_shared(socket); + std::shared_ptr protocol = std::make_shared(transport); std::shared_ptr thrift_client = std::make_shared(protocol); try { @@ -268,7 +268,7 @@ std::shared_ptr HiveMetastoreClientFactory::createThr } catch (TException & tx) { - throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "connect to hive metastore: {} failed. {}", name, tx.what()); } return thrift_client; } From ca470e1b94aae83d9bf6edd0bb7aa7f58bbaecb8 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 1 Mar 2022 19:01:39 +0800 Subject: [PATCH 20/87] lazy initialization about getting hive metadata in HiveStorage --- src/Storages/Hive/StorageHive.cpp | 19 +++++++++++++++---- src/Storages/Hive/StorageHive.h | 5 +++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 3040ad23283..63445c4a24c 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -286,14 +286,22 @@ StorageHive::StorageHive( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment_); setInMemoryMetadata(storage_metadata); +} + +void StorageHive::lazyInitialize() +{ + std::lock_guard lock{init_mutex}; + if (has_initialized) + return; + auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); - auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); + auto hive_table_metadata = hive_metastore_client->getHiveTable(hive_database, hive_table); - hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->getTable()->sd.location); - table_schema = hive_table_metadata->getTable()->sd.cols; + hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->sd.location); + table_schema = hive_table_metadata->sd.cols; - FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->getTable()->sd.inputFormat); + FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->sd.inputFormat); switch (hdfs_file_format) { case FileFormat::TEXT: @@ -331,6 +339,7 @@ StorageHive::StorageHive( } initMinMaxIndexExpression(); + has_initialized = true; } void StorageHive::initMinMaxIndexExpression() @@ -552,6 +561,8 @@ Pipe StorageHive::read( size_t max_block_size, unsigned num_streams) { + lazyInitialize(); + HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, context_->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9629629e057..f6968ebb7c6 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -94,6 +94,9 @@ private: String hive_database; String hive_table; + std::mutex init_mutex; + bool has_initialized = false; + /// Hive table meta std::vector table_schema; Names text_input_field_names; /// Defines schema of hive file, only used when text input format is TEXT @@ -116,6 +119,8 @@ private: std::shared_ptr storage_settings; Poco::Logger * log = &Poco::Logger::get("StorageHive"); + + void lazyInitialize(); }; } From 755e63ed0387aeca5726096895f6b4d0dd36d482 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 1 Mar 2022 19:21:59 +0100 Subject: [PATCH 21/87] Keep compatibility --- src/Storages/FileLog/StorageFileLog.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 604116808ab..48a729febb2 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -99,8 +99,18 @@ void StorageFileLog::loadMetaFiles(bool attach) /// Attach table if (attach) { + const auto & storage = getStorageID(); + + auto metadata_path_exist = std::filesystem::exists(metadata_base_path); + auto previous_path = std::filesystem::path(getContext()->getPath()) / ".filelog_storage_metadata" / storage.getDatabaseName() / storage.getTableName(); + + /// For compatibility with the previous path version. + if (std::filesystem::exists(previous_path) && !metadata_path_exist) + { + std::filesystem::copy(previous_path, metadata_base_path, std::filesystem::copy_options::recursive); + } /// Meta file may lost, log and create directory - if (!std::filesystem::exists(metadata_base_path)) + else if (!metadata_path_exist) { /// Create metadata_base_path directory when store meta data LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName()); From 2ff7ff90fa6f6c4ad4f6bdb88eb755137ba10a85 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 2 Mar 2022 14:17:04 +0800 Subject: [PATCH 22/87] update tests --- .../test/integration/hive_server/Dockerfile | 5 +- .../hive_server/http_api_server.py | 72 +++++++++++++++++++ docker/test/integration/hive_server/start.sh | 3 +- 3 files changed, 77 insertions(+), 3 deletions(-) create mode 100644 docker/test/integration/hive_server/http_api_server.py diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index fa6e4bf6313..391f9a5e22f 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -42,6 +42,9 @@ COPY prepare_hive_data.sh / COPY demo_data.txt / ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH - +RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format +RUN apt install -y python3 python3-pip +RUN pip3 install flask requests +COPY http_api_server.py / COPY start.sh / diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py new file mode 100644 index 00000000000..74b511a3a35 --- /dev/null +++ b/docker/test/integration/hive_server/http_api_server.py @@ -0,0 +1,72 @@ +import os +import subprocess +import datetime +from flask import Flask, flash, request, redirect, url_for + +def run_command(command, wait=False): + print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) + lines = [] + p = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True) + if wait: + for l in iter(p.stdout.readline, b''): + lines.append(l) + p.poll() + return (lines, p.returncode) + else: + return(iter(p.stdout.readline, b''), 0) + + +UPLOAD_FOLDER = './' +ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'sh'} +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +@app.route('/') +def hello_world(): + return 'Hello World' + + +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + + +@app.route('/upload', methods=['GET', 'POST']) +def upload_file(): + if request.method == 'POST': + # check if the post request has the file part + if 'file' not in request.files: + flash('No file part') + return redirect(request.url) + file = request.files['file'] + # If the user does not select a file, the browser submits an + # empty file without a filename. + if file.filename == '': + flash('No selected file') + return redirect(request.url) + if file and allowed_file(file.filename): + filename = file.filename + file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) + return redirect(url_for('upload_file', name=filename)) + return ''' + + Upload new File +

Upload new File

+
+ + +
+ ''' +@app.route('/run', methods=['GET', 'POST']) +def parse_request(): + data = request.data # data is empty + print(data) + run_command(data, wait=True) + return 'Ok' + # need posted data here + +if __name__ == '__main__': + app.run(port=5011) diff --git a/docker/test/integration/hive_server/start.sh b/docker/test/integration/hive_server/start.sh index e01f28542af..4224b8126e6 100755 --- a/docker/test/integration/hive_server/start.sh +++ b/docker/test/integration/hive_server/start.sh @@ -1,6 +1,5 @@ service ssh start sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml -hadoop namenode -format start-all.sh service mysql start mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\"" @@ -9,4 +8,4 @@ schematool -initSchema -dbType mysql #nohup hiveserver2 & nohup hive --service metastore & bash /prepare_hive_data.sh -while true; do sleep 1000; done +python3 http_api_server.py From 21c164ad2c63d8d73f7bc78e173d7a26034524fe Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 2 Mar 2022 14:20:59 +0800 Subject: [PATCH 23/87] update tests --- docker/test/integration/hive_server/http_api_server.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py index 74b511a3a35..4818b785c89 100644 --- a/docker/test/integration/hive_server/http_api_server.py +++ b/docker/test/integration/hive_server/http_api_server.py @@ -20,7 +20,7 @@ def run_command(command, wait=False): UPLOAD_FOLDER = './' -ALLOWED_EXTENSIONS = {'txt', 'pdf', 'png', 'jpg', 'jpeg', 'gif', 'sh'} +ALLOWED_EXTENSIONS = {'txt', 'sh'} app = Flask(__name__) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER @@ -63,10 +63,8 @@ def upload_file(): @app.route('/run', methods=['GET', 'POST']) def parse_request(): data = request.data # data is empty - print(data) run_command(data, wait=True) return 'Ok' - # need posted data here if __name__ == '__main__': app.run(port=5011) From ef344a581b213eaa0ab1a4d43583cf1892ca12d5 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 2 Mar 2022 15:28:17 +0100 Subject: [PATCH 24/87] Fix bug in FileLog storage --- src/Storages/FileLog/StorageFileLog.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 48a729febb2..07853677222 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -837,6 +837,9 @@ void registerStorageFileLog(StorageFactory & factory) bool StorageFileLog::updateFileInfos() { + if (file_infos.file_names.empty()) + return false; + if (!directory_watch) { /// For table just watch one file, we can not use directory monitor to watch it From 74ff3ee4bcf28796a33c81f55518ea9769ccfff2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 2 Mar 2022 20:02:02 +0100 Subject: [PATCH 25/87] Add sanity check for localhost --- src/Common/isLocalAddress.cpp | 28 ++++++++++++++++--------- src/Common/isLocalAddress.h | 1 + src/Coordination/KeeperStateManager.cpp | 27 ++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp index d79e4cebd15..40907ddbe04 100644 --- a/src/Common/isLocalAddress.cpp +++ b/src/Common/isLocalAddress.cpp @@ -79,17 +79,8 @@ struct NetworkInterfaces } -bool isLocalAddress(const Poco::Net::IPAddress & address) +bool isLoopback(const Poco::Net::IPAddress & address) { - /** 127.0.0.1 is treat as local address unconditionally. - * ::1 is also treat as local address unconditionally. - * - * 127.0.0.{2..255} are not treat as local addresses, because they are used in tests - * to emulate distributed queries across localhost. - * - * But 127.{0,1}.{0,1}.{0,1} are treat as local addresses, - * because they are used in Debian for localhost. - */ if (address.isLoopback()) { if (address.family() == Poco::Net::AddressFamily::IPv4) @@ -111,6 +102,23 @@ bool isLocalAddress(const Poco::Net::IPAddress & address) } } + return false; +} + +bool isLocalAddress(const Poco::Net::IPAddress & address) +{ + /** 127.0.0.1 is treat as local address unconditionally. + * ::1 is also treat as local address unconditionally. + * + * 127.0.0.{2..255} are not treat as local addresses, because they are used in tests + * to emulate distributed queries across localhost. + * + * But 127.{0,1}.{0,1}.{0,1} are treat as local addresses, + * because they are used in Debian for localhost. + */ + if (isLoopback(address)) + return true; + NetworkInterfaces interfaces; return interfaces.hasAddress(address); } diff --git a/src/Common/isLocalAddress.h b/src/Common/isLocalAddress.h index 42977775b09..9cae0e0460f 100644 --- a/src/Common/isLocalAddress.h +++ b/src/Common/isLocalAddress.h @@ -25,6 +25,7 @@ namespace DB bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port); bool isLocalAddress(const Poco::Net::SocketAddress & address); bool isLocalAddress(const Poco::Net::IPAddress & address); + bool isLoopback(const Poco::Net::IPAddress & address); /// Returns number of different bytes in hostnames, used for load balancing size_t getHostNameDifference(const std::string & local_hostname, const std::string & host); diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index c2d4274f972..d646695cda3 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { @@ -12,6 +14,16 @@ namespace ErrorCodes extern const int RAFT_ERROR; } +namespace +{ + +bool isLocalhost(const std::string & hostname) +{ + return isLoopback(DNSResolver::instance().resolveHost(hostname)); +} + +} + KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const { KeeperConfigurationWrapper result; @@ -25,6 +37,8 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC std::unordered_map check_duplicated_hostnames; size_t total_servers = 0; + std::string local_hostname; + std::string non_local_hostname; for (const auto & server_key : keys) { if (!startsWith(server_key, "server")) @@ -38,6 +52,11 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC int32_t priority = config.getInt(full_prefix + ".priority", 1); bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false); + if (isLocalhost(hostname)) + local_hostname = hostname; + else + non_local_hostname = hostname; + if (start_as_follower) result.servers_start_as_followers.insert(new_server_id); @@ -77,6 +96,14 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC if (result.servers_start_as_followers.size() == total_servers) throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without )"); + if (!local_hostname.empty() && !non_local_hostname.empty()) + { + throw Exception( + ErrorCodes::RAFT_ERROR, + "Mixing local and non-local hostnames ('{}' and '{}') in raft_configuration is not allowed. Different hosts can resolve it to themselves so it's not allowed.", + local_hostname, non_local_hostname); + } + return result; } From 5fa7cc6e1821824e739a390387620569c5512d1f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 2 Mar 2022 19:04:14 +0000 Subject: [PATCH 26/87] Fix test. --- tests/queries/0_stateless/01275_parallel_mv.reference | 8 ++++---- tests/queries/0_stateless/01275_parallel_mv.sql | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a5987acafde..9021ae2bb1a 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -2,8 +2,8 @@ set parallel_view_processing=1; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; -8 +select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; +1 select count() from testX; 10 select count() from testXA; @@ -15,8 +15,8 @@ select count() from testXC; set parallel_view_processing=0; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; -5 +select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; +1 select count() from testX; 20 select count() from testXA; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql b/tests/queries/0_stateless/01275_parallel_mv.sql index 32b43ce616f..d1fa8bbcae6 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql +++ b/tests/queries/0_stateless/01275_parallel_mv.sql @@ -13,7 +13,7 @@ create materialized view testXC engine=MergeTree order by tuple() as select slee set parallel_view_processing=1; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; +select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; select count() from testX; select count() from testXA; @@ -23,7 +23,7 @@ select count() from testXC; set parallel_view_processing=0; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; +select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; select count() from testX; select count() from testXA; From 04d4c52e01ed92512d933193fce2f250346a91ee Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 2 Mar 2022 20:37:59 +0100 Subject: [PATCH 27/87] Add sanity checks for keeper --- src/Coordination/KeeperStateManager.cpp | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index d646695cda3..066dc0d63e8 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -24,6 +24,12 @@ bool isLocalhost(const std::string & hostname) } +/// this function quite long because contains a lot of sanity checks in config: +/// 1. No duplicate endpoints +/// 2. No "localhost" or "127.0.0.1" or another local addresses mixed with normal addresses +/// 3. Raft internal port is equal to client port +/// 4. No duplicate IDs +/// 5. Our ID present in hostnames list KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const { KeeperConfigurationWrapper result; @@ -31,6 +37,13 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix + ".raft_configuration", keys); + std::unordered_set client_ports; + if (config.has(config_prefix + ".tcp_port")) + client_ports.insert(config.getUInt64(config_prefix + ".tcp_port")); + + if (config.has(config_prefix + ".tcp_port_secure")) + client_ports.insert(config.getUInt64(config_prefix + ".tcp_port_secure")); + /// Sometimes (especially in cloud envs) users can provide incorrect /// configuration with duplicated raft ids or endpoints. We check them /// on config parsing stage and never commit to quorum. @@ -52,6 +65,12 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC int32_t priority = config.getInt(full_prefix + ".priority", 1); bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false); + if (client_ports.contains(port)) + { + throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains hostname '{}' with port '{}' which is equal to client port on current machine", + hostname, port); + } + if (isLocalhost(hostname)) local_hostname = hostname; else @@ -63,7 +82,7 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC auto endpoint = hostname + ":" + std::to_string(port); if (check_duplicated_hostnames.count(endpoint)) { - throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contain duplicate endpoints: " + throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains duplicate endpoints: " "endpoint {} has been already added with id {}, but going to add it one more time with id {}", endpoint, check_duplicated_hostnames[endpoint], new_server_id); } @@ -73,7 +92,7 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC for (const auto & [id_endpoint, id] : check_duplicated_hostnames) { if (new_server_id == id) - throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contain duplicate ids: id {} has been already added with endpoint {}, " + throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains duplicate ids: id {} has been already added with endpoint {}, " "but going to add it one more time with endpoint {}", id, id_endpoint, endpoint); } check_duplicated_hostnames.emplace(endpoint, new_server_id); From 9ee0d2c8a0af81ffbea40f6f1b631cd53d2cc3e2 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 3 Mar 2022 14:30:22 +0800 Subject: [PATCH 28/87] keeper atomic stat --- src/Coordination/FourLetterCommand.cpp | 6 +-- src/Coordination/KeeperConnectionStats.cpp | 52 +++++++++++----------- src/Coordination/KeeperConnectionStats.h | 20 +++++---- src/Coordination/KeeperDispatcher.cpp | 1 - src/Coordination/KeeperDispatcher.h | 7 +-- src/Server/KeeperTCPHandler.cpp | 25 +++-------- src/Server/KeeperTCPHandler.h | 3 +- 7 files changed, 48 insertions(+), 66 deletions(-) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index 4c76d052f9b..feeec222c0f 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -202,7 +202,7 @@ void print(IFourLetterCommand::StringBuffer & buf, const String & key, uint64_t String MonitorCommand::run() { - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); if (!keeper_info.has_leader) @@ -288,7 +288,7 @@ String ServerStatCommand::run() writeText('\n', buf); }; - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); @@ -314,7 +314,7 @@ String StatCommand::run() auto write = [&buf] (const String & key, const String & value) { buf << key << ": " << value << '\n'; }; - KeeperConnectionStats stats = keeper_dispatcher.getKeeperConnectionStats(); + auto & stats = keeper_dispatcher.getKeeperConnectionStats(); Keeper4LWInfo keeper_info = keeper_dispatcher.getKeeper4LWInfo(); write("ClickHouse Keeper version", String(VERSION_DESCRIBE) + "-" + VERSION_GITHASH); diff --git a/src/Coordination/KeeperConnectionStats.cpp b/src/Coordination/KeeperConnectionStats.cpp index 1e9f2c051a7..26455de25f5 100644 --- a/src/Coordination/KeeperConnectionStats.cpp +++ b/src/Coordination/KeeperConnectionStats.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB @@ -5,61 +6,58 @@ namespace DB uint64_t KeeperConnectionStats::getMinLatency() const { - return min_latency; + return min_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getMaxLatency() const { - return max_latency; + return max_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getAvgLatency() const { - if (count != 0) - return total_latency / count; + auto cnt = count.load(std::memory_order_relaxed); + if (cnt) + return total_latency.load(std::memory_order_relaxed) / cnt; return 0; } uint64_t KeeperConnectionStats::getLastLatency() const { - return last_latency; + return last_latency.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getPacketsReceived() const { - return packets_received; + return packets_received.load(std::memory_order_relaxed); } uint64_t KeeperConnectionStats::getPacketsSent() const { - return packets_sent; + return packets_sent.load(std::memory_order_relaxed); } void KeeperConnectionStats::incrementPacketsReceived() { - packets_received++; + packets_received.fetch_add(1, std::memory_order_relaxed); } void KeeperConnectionStats::incrementPacketsSent() { - packets_sent++; + packets_sent.fetch_add(1, std::memory_order_relaxed); } void KeeperConnectionStats::updateLatency(uint64_t latency_ms) { - last_latency = latency_ms; - total_latency += (latency_ms); - count++; + last_latency.store(latency_ms, std::memory_order_relaxed); + total_latency.fetch_add(latency_ms, std::memory_order_relaxed); + count.fetch_add(1, std::memory_order_relaxed); - if (latency_ms < min_latency) - { - min_latency = latency_ms; - } + if (latency_ms < min_latency.load(std::memory_order_relaxed)) + min_latency.store(latency_ms, std::memory_order_relaxed); - if (latency_ms > max_latency) - { - max_latency = latency_ms; - } + if (latency_ms > max_latency.load(std::memory_order_relaxed)) + max_latency.store(latency_ms, std::memory_order_relaxed); } void KeeperConnectionStats::reset() @@ -70,17 +68,17 @@ void KeeperConnectionStats::reset() void KeeperConnectionStats::resetLatency() { - total_latency = 0; - count = 0; - max_latency = 0; - min_latency = 0; - last_latency = 0; + total_latency.store(0, std::memory_order_relaxed); + count.store(0, std::memory_order_relaxed); + max_latency.store(0, std::memory_order_relaxed); + min_latency.store(0, std::memory_order_relaxed); + last_latency.store(0, std::memory_order_relaxed); } void KeeperConnectionStats::resetRequestCounters() { - packets_received = 0; - packets_sent = 0; + packets_received.store(0, std::memory_order_relaxed); + packets_sent.store(0, std::memory_order_relaxed); } } diff --git a/src/Coordination/KeeperConnectionStats.h b/src/Coordination/KeeperConnectionStats.h index 3cd881d553b..ff276def0c9 100644 --- a/src/Coordination/KeeperConnectionStats.h +++ b/src/Coordination/KeeperConnectionStats.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -11,7 +12,10 @@ namespace DB class KeeperConnectionStats { public: - KeeperConnectionStats() = default; + KeeperConnectionStats() + { + reset(); + } uint64_t getMinLatency() const; uint64_t getMaxLatency() const; @@ -33,20 +37,20 @@ private: void resetRequestCounters(); /// all response with watch response included - uint64_t packets_sent = 0; + std::atomic_uint64_t packets_sent; /// All user requests - uint64_t packets_received = 0; + std::atomic_uint64_t packets_received; /// For consistent with zookeeper measured by millisecond, /// otherwise maybe microsecond is better - uint64_t total_latency = 0; - uint64_t max_latency = 0; - uint64_t min_latency = 0; + std::atomic_uint64_t total_latency; + std::atomic_uint64_t max_latency; + std::atomic_uint64_t min_latency; /// last operation latency - uint64_t last_latency = 0; + std::atomic_uint64_t last_latency; - uint64_t count = 0; + std::atomic_uint64_t count; }; } diff --git a/src/Coordination/KeeperDispatcher.cpp b/src/Coordination/KeeperDispatcher.cpp index 86e22b834d4..a4dcb0acc52 100644 --- a/src/Coordination/KeeperDispatcher.cpp +++ b/src/Coordination/KeeperDispatcher.cpp @@ -594,7 +594,6 @@ void KeeperDispatcher::updateConfiguration(const Poco::Util::AbstractConfigurati void KeeperDispatcher::updateKeeperStatLatency(uint64_t process_time_ms) { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.updateLatency(process_time_ms); } diff --git a/src/Coordination/KeeperDispatcher.h b/src/Coordination/KeeperDispatcher.h index 646fab5d80f..cc680f8d78f 100644 --- a/src/Coordination/KeeperDispatcher.h +++ b/src/Coordination/KeeperDispatcher.h @@ -68,7 +68,6 @@ private: /// RAFT wrapper. std::unique_ptr server; - mutable std::mutex keeper_stats_mutex; KeeperConnectionStats keeper_stats; KeeperConfigurationAndSettingsPtr configuration_and_settings; @@ -159,9 +158,8 @@ public: uint64_t getSnapDirSize() const; /// Request statistics such as qps, latency etc. - KeeperConnectionStats getKeeperConnectionStats() const + KeeperConnectionStats & getKeeperConnectionStats() { - std::lock_guard lock(keeper_stats_mutex); return keeper_stats; } @@ -179,19 +177,16 @@ public: void incrementPacketsSent() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.incrementPacketsSent(); } void incrementPacketsReceived() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.incrementPacketsReceived(); } void resetConnectionStats() { - std::lock_guard lock(keeper_stats_mutex); keeper_stats.reset(); } }; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 07964c29577..3e354cfd18f 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -544,19 +544,13 @@ std::pair KeeperTCPHandler::receiveReque void KeeperTCPHandler::packageSent() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsSent(); - } + conn_stats.incrementPacketsSent(); keeper_dispatcher->incrementPacketsSent(); } void KeeperTCPHandler::packageReceived() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsReceived(); - } + conn_stats.incrementPacketsReceived(); keeper_dispatcher->incrementPacketsReceived(); } @@ -566,10 +560,7 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.updateLatency(elapsed); - } + conn_stats.updateLatency(elapsed); operations.erase(response->xid); keeper_dispatcher->updateKeeperStatLatency(elapsed); @@ -584,15 +575,14 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response } -KeeperConnectionStats KeeperTCPHandler::getConnectionStats() const +KeeperConnectionStats & KeeperTCPHandler::getConnectionStats() { - std::lock_guard lock(conn_stats_mutex); return conn_stats; } void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) { - KeeperConnectionStats stats = getConnectionStats(); + auto & stats = getConnectionStats(); writeText(' ', buf); writeText(socket().peerAddress().toString(), buf); @@ -641,10 +631,7 @@ void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) void KeeperTCPHandler::resetStats() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.reset(); - } + conn_stats.reset(); last_op.set(std::make_unique(EMPTY_LAST_OP)); } diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 7953dfd2cbe..b8cccafeca5 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -51,7 +51,7 @@ public: KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_); void run() override; - KeeperConnectionStats getConnectionStats() const; + KeeperConnectionStats & getConnectionStats(); void dumpStats(WriteBufferFromOwnString & buf, bool brief); void resetStats(); @@ -100,7 +100,6 @@ private: LastOpMultiVersion last_op; - mutable std::mutex conn_stats_mutex; KeeperConnectionStats conn_stats; }; From f24fa161845a5b910238c12e328021d4ff8908c3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 3 Mar 2022 11:29:43 +0100 Subject: [PATCH 29/87] Fix address check --- src/Coordination/KeeperStateManager.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 066dc0d63e8..b24fa6cdb5e 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -19,7 +19,15 @@ namespace bool isLocalhost(const std::string & hostname) { - return isLoopback(DNSResolver::instance().resolveHost(hostname)); + try + { + return isLoopback(DNSResolver::instance().resolveHost(hostname)); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return false; } } From 33baefb12bcd4b56ce60ed5d20d03e9f843c174a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Thu, 3 Mar 2022 14:35:52 +0100 Subject: [PATCH 30/87] Done --- src/Common/Config/ConfigProcessor.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index b86e8ed3e40..1070430b842 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -664,6 +664,10 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, new_path.erase(0, main_config_path.size()); std::replace(new_path.begin(), new_path.end(), '/', '_'); + /// If we have config file in YAML format, the preprocessed config will inherit .yaml extension + /// but will contain config in XML format, so some tools like clickhouse extract-from-config won't work + new_path = fs::path(new_path).replace_extension(".xml").string(); + if (preprocessed_dir.empty()) { if (!loaded_config.configuration->has("path")) From c33808d3db3ad9b637578e17ef365cc9de4e64e7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 3 Mar 2022 15:18:03 +0100 Subject: [PATCH 31/87] Fix read old records from logs --- src/Coordination/KeeperStateMachine.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index e3d99d4775b..a9b5fc85828 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -39,7 +39,9 @@ namespace request_for_session.request->xid = xid; request_for_session.request->readImpl(buffer); - readIntBinary(request_for_session.time, buffer); + if (!buffer.eof()) + readIntBinary(request_for_session.time, buffer); + return request_for_session; } } From d19f199e93ccd64fb07e19f5e0f4cc3e5ab2963f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 3 Mar 2022 15:25:27 +0100 Subject: [PATCH 32/87] Revert --- .../FileLog/ReadBufferFromFileLog.cpp | 2 +- src/Storages/FileLog/StorageFileLog.cpp | 49 ++++++------------- src/Storages/FileLog/StorageFileLog.h | 3 +- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/src/Storages/FileLog/ReadBufferFromFileLog.cpp b/src/Storages/FileLog/ReadBufferFromFileLog.cpp index ebb0925f128..86caac57a53 100644 --- a/src/Storages/FileLog/ReadBufferFromFileLog.cpp +++ b/src/Storages/FileLog/ReadBufferFromFileLog.cpp @@ -134,7 +134,7 @@ void ReadBufferFromFileLog::readNewRecords(ReadBufferFromFileLog::Records & new_ UInt64 current_position = reader.tellg(); StorageFileLog::assertStreamGood(reader); - file_meta.last_written_position = current_position; + file_meta.last_writen_position = current_position; /// stream reach to end if (current_position == file_meta.last_open_end) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 07853677222..b43f47eb6c5 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -232,12 +232,12 @@ void StorageFileLog::serialize() const } else { - checkOffsetIsValid(full_name, meta.last_written_position); + checkOffsetIsValid(full_name, meta.last_writen_position); } WriteBufferFromFile out(full_name); writeIntText(inode, out); writeChar('\n', out); - writeIntText(meta.last_written_position, out); + writeIntText(meta.last_writen_position, out); } } @@ -254,12 +254,12 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const } else { - checkOffsetIsValid(full_name, file_meta.last_written_position); + checkOffsetIsValid(full_name, file_meta.last_writen_position); } WriteBufferFromFile out(full_name); writeIntText(inode, out); writeChar('\n', out); - writeIntText(file_meta.last_written_position, out); + writeIntText(file_meta.last_writen_position, out); } void StorageFileLog::deserialize() @@ -294,7 +294,7 @@ void StorageFileLog::deserialize() } meta.file_name = dir_entry.path().filename(); - meta.last_written_position = last_written_pos; + meta.last_writen_position = last_written_pos; file_infos.meta_by_inode.emplace(inode, meta); } @@ -445,17 +445,7 @@ void StorageFileLog::openFilesAndSetPos() auto & file_ctx = findInMap(file_infos.context_by_name, file); if (file_ctx.status != FileStatus::NO_CHANGE) { - auto & meta = findInMap(file_infos.meta_by_inode, file_ctx.inode); - - auto current_path = getFullDataPath(file); - if (!std::filesystem::exists(file) && meta.last_written_position != 0) - { - file_ctx.status = FileStatus::REMOVED; - continue; - } - - file_ctx.reader.emplace(current_path); - + file_ctx.reader.emplace(getFullDataPath(file)); auto & reader = file_ctx.reader.value(); assertStreamGood(reader); @@ -465,24 +455,23 @@ void StorageFileLog::openFilesAndSetPos() auto file_end = reader.tellg(); assertStreamGood(reader); - if (meta.last_written_position > static_cast(file_end)) + auto & meta = findInMap(file_infos.meta_by_inode, file_ctx.inode); + if (meta.last_writen_position > static_cast(file_end)) { throw Exception( ErrorCodes::CANNOT_READ_ALL_DATA, "Last saved offsset for File {} is bigger than file size ({} > {})", file, - meta.last_written_position, + meta.last_writen_position, file_end); } /// update file end at the moment, used in ReadBuffer and serialize meta.last_open_end = file_end; - reader.seekg(meta.last_written_position); + reader.seekg(meta.last_writen_position); assertStreamGood(reader); } } - - removeInvalidFiles(); serialize(); } @@ -951,18 +940,6 @@ bool StorageFileLog::updateFileInfos() } } } - - removeInvalidFiles(); - - /// These file infos should always have same size(one for one) - assert(file_infos.file_names.size() == file_infos.meta_by_inode.size()); - assert(file_infos.file_names.size() == file_infos.context_by_name.size()); - - return events.empty() || file_infos.file_names.empty(); -} - -void StorageFileLog::removeInvalidFiles() -{ std::vector valid_files; /// Remove file infos with REMOVE status @@ -992,6 +969,12 @@ void StorageFileLog::removeInvalidFiles() } } file_infos.file_names.swap(valid_files); + + /// These file infos should always have same size(one for one) + assert(file_infos.file_names.size() == file_infos.meta_by_inode.size()); + assert(file_infos.file_names.size() == file_infos.context_by_name.size()); + + return events.empty() || file_infos.file_names.empty(); } NamesAndTypesList StorageFileLog::getVirtuals() const diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 9eaac3e8eb3..98915f10a05 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -72,7 +72,7 @@ public: struct FileMeta { String file_name; - UInt64 last_written_position = 0; + UInt64 last_writen_position = 0; UInt64 last_open_end = 0; }; @@ -205,7 +205,6 @@ private: void deserialize(); static void checkOffsetIsValid(const String & full_name, UInt64 offset); - void removeInvalidFiles(); }; } From 69b5fbed64f6246c2ebd75cb3cf8e46289e00b7a Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 4 Mar 2022 16:37:18 +0800 Subject: [PATCH 33/87] Translate zh/sql-reference/statements/alter/row-policy: rename old file --- .../statements/alter/{row-policy.md => row-policy.md.bak} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/zh/sql-reference/statements/alter/{row-policy.md => row-policy.md.bak} (100%) diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md.bak similarity index 100% rename from docs/zh/sql-reference/statements/alter/row-policy.md rename to docs/zh/sql-reference/statements/alter/row-policy.md.bak From 1d2707e7eae51083124c6c72618bc945b369bd08 Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 4 Mar 2022 16:39:12 +0800 Subject: [PATCH 34/87] Translate zh/sql-reference/statements/alter/row-policy: reimport file --- .../statements/alter/row-policy.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 docs/zh/sql-reference/statements/alter/row-policy.md diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md new file mode 100644 index 00000000000..bbf9f317737 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/row-policy.md @@ -0,0 +1,19 @@ +--- +toc_priority: 47 +toc_title: ROW POLICY +--- + +# ALTER ROW POLICY {#alter-row-policy-statement} + +Changes row policy. + +Syntax: + +``` sql +ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...] + [AS {PERMISSIVE | RESTRICTIVE}] + [FOR SELECT] + [USING {condition | NONE}][,...] + [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] +``` From 44d626af6fcdad817b24881a997eb83efe769b7c Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 4 Mar 2022 16:41:07 +0800 Subject: [PATCH 35/87] Translate zh/sql-reference/statements/alter/row-policy: translate to zh --- docs/zh/sql-reference/statements/alter/row-policy.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md index bbf9f317737..0cdba239b84 100644 --- a/docs/zh/sql-reference/statements/alter/row-policy.md +++ b/docs/zh/sql-reference/statements/alter/row-policy.md @@ -1,13 +1,13 @@ --- toc_priority: 47 -toc_title: ROW POLICY +toc_title: 行策略 --- -# ALTER ROW POLICY {#alter-row-policy-statement} +# 操作行策略 {#alter-row-policy-statement} -Changes row policy. +修改行策略. -Syntax: +语法: ``` sql ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1] From 4167f82c57a06912ace1ff0047e09a919d2aee82 Mon Sep 17 00:00:00 2001 From: cnmade Date: Fri, 4 Mar 2022 16:45:39 +0800 Subject: [PATCH 36/87] Translate zh/sql-reference/statements/alter/row-policy: remove old file --- docs/zh/sql-reference/statements/alter/row-policy.md.bak | 1 - 1 file changed, 1 deletion(-) delete mode 120000 docs/zh/sql-reference/statements/alter/row-policy.md.bak diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md.bak b/docs/zh/sql-reference/statements/alter/row-policy.md.bak deleted file mode 120000 index 09ad2d301f3..00000000000 --- a/docs/zh/sql-reference/statements/alter/row-policy.md.bak +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/row-policy.md \ No newline at end of file From c836a57000c4eb36bbdee8dfda6ef22148e91577 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 4 Mar 2022 11:46:15 +0000 Subject: [PATCH 37/87] fix reading from system.asynchronous_inserts table --- .../System/StorageSystemAsynchronousInserts.cpp | 15 +++++++++++++-- .../02015_async_inserts_stress_long.sh | 10 ++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index a16b76411bb..80fc070c83a 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -62,8 +62,19 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co size_t i = 0; res_columns[i++]->insert(queryToString(insert_query)); - res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); - res_columns[i++]->insert(insert_query.table_id.getTableName()); + + /// If query is "INSERT INTO FUNCTION" then table_id is empty. + if (insert_query.table_id) + { + res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); + res_columns[i++]->insert(insert_query.table_id.getTableName()); + } + else + { + res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + } + res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); res_columns[i++]->insert(time_in_microseconds(elem->data->last_update)); diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index f9a58818404..31258883194 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -24,6 +24,14 @@ function insert2() done } +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + function select1() { while true; do @@ -53,6 +61,7 @@ TIMEOUT=10 export -f insert1 export -f insert2 +export -f insert3 export -f select1 export -f select2 export -f truncate1 @@ -60,6 +69,7 @@ export -f truncate1 for _ in {1..5}; do timeout $TIMEOUT bash -c insert1 & timeout $TIMEOUT bash -c insert2 & + timeout $TIMEOUT bash -c insert3 & done timeout $TIMEOUT bash -c select1 & From 2ab920a404203513053ba3f2d90e7ad9b47f3f49 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 4 Mar 2022 13:14:38 +0100 Subject: [PATCH 38/87] Review fixes --- src/Common/isLocalAddress.cpp | 28 +++++--------- src/Common/isLocalAddress.h | 1 - src/Coordination/KeeperStateManager.cpp | 51 +++++++++++++++++++++---- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp index 40907ddbe04..d79e4cebd15 100644 --- a/src/Common/isLocalAddress.cpp +++ b/src/Common/isLocalAddress.cpp @@ -79,8 +79,17 @@ struct NetworkInterfaces } -bool isLoopback(const Poco::Net::IPAddress & address) +bool isLocalAddress(const Poco::Net::IPAddress & address) { + /** 127.0.0.1 is treat as local address unconditionally. + * ::1 is also treat as local address unconditionally. + * + * 127.0.0.{2..255} are not treat as local addresses, because they are used in tests + * to emulate distributed queries across localhost. + * + * But 127.{0,1}.{0,1}.{0,1} are treat as local addresses, + * because they are used in Debian for localhost. + */ if (address.isLoopback()) { if (address.family() == Poco::Net::AddressFamily::IPv4) @@ -102,23 +111,6 @@ bool isLoopback(const Poco::Net::IPAddress & address) } } - return false; -} - -bool isLocalAddress(const Poco::Net::IPAddress & address) -{ - /** 127.0.0.1 is treat as local address unconditionally. - * ::1 is also treat as local address unconditionally. - * - * 127.0.0.{2..255} are not treat as local addresses, because they are used in tests - * to emulate distributed queries across localhost. - * - * But 127.{0,1}.{0,1}.{0,1} are treat as local addresses, - * because they are used in Debian for localhost. - */ - if (isLoopback(address)) - return true; - NetworkInterfaces interfaces; return interfaces.hasAddress(address); } diff --git a/src/Common/isLocalAddress.h b/src/Common/isLocalAddress.h index 9cae0e0460f..42977775b09 100644 --- a/src/Common/isLocalAddress.h +++ b/src/Common/isLocalAddress.h @@ -25,7 +25,6 @@ namespace DB bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_port); bool isLocalAddress(const Poco::Net::SocketAddress & address); bool isLocalAddress(const Poco::Net::IPAddress & address); - bool isLoopback(const Poco::Net::IPAddress & address); /// Returns number of different bytes in hostnames, used for load balancing size_t getHostNameDifference(const std::string & local_hostname, const std::string & host); diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index b24fa6cdb5e..c5d719919c8 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -17,16 +17,31 @@ namespace ErrorCodes namespace { -bool isLocalhost(const std::string & hostname) +bool isLoopback(const std::string & hostname) { try { - return isLoopback(DNSResolver::instance().resolveHost(hostname)); + return DNSResolver::instance().resolveHost(hostname).isLoopback(); } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); } + + return false; +} + +bool isLocalhost(const std::string & hostname) +{ + try + { + return isLocalAddress(DNSResolver::instance().resolveHost(hostname)); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return false; } @@ -58,8 +73,9 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC std::unordered_map check_duplicated_hostnames; size_t total_servers = 0; - std::string local_hostname; + std::string loopback_hostname; std::string non_local_hostname; + size_t local_address_counter = 0; for (const auto & server_key : keys) { if (!startsWith(server_key, "server")) @@ -79,10 +95,19 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC hostname, port); } - if (isLocalhost(hostname)) - local_hostname = hostname; + if (isLoopback(hostname)) + { + loopback_hostname = hostname; + local_address_counter++; + } + else if (isLocalhost(hostname)) + { + local_address_counter++; + } else + { non_local_hostname = hostname; + } if (start_as_follower) result.servers_start_as_followers.insert(new_server_id); @@ -123,12 +148,22 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC if (result.servers_start_as_followers.size() == total_servers) throw Exception(ErrorCodes::RAFT_ERROR, "At least one of servers should be able to start as leader (without )"); - if (!local_hostname.empty() && !non_local_hostname.empty()) + if (!loopback_hostname.empty() && !non_local_hostname.empty()) { throw Exception( ErrorCodes::RAFT_ERROR, - "Mixing local and non-local hostnames ('{}' and '{}') in raft_configuration is not allowed. Different hosts can resolve it to themselves so it's not allowed.", - local_hostname, non_local_hostname); + "Mixing loopback and non-local hostnames ('{}' and '{}') in raft_configuration is not allowed. " + "Different hosts can resolve it to themselves so it's not allowed.", + loopback_hostname, non_local_hostname); + } + + if (!non_local_hostname.empty() && local_address_counter > 1) + { + throw Exception( + ErrorCodes::RAFT_ERROR, + "Local address specified more than once ({} times) and non-local hostnames also exists ('{}') in raft_configuration. " + "Such configuration is not allowed because single host can vote multiple times.", + local_address_counter, non_local_hostname); } return result; From 5db9018ae0341c101effbd6ff9e4cde4e357cab7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 4 Mar 2022 13:26:56 +0100 Subject: [PATCH 39/87] Fix unitialized variable --- src/Coordination/KeeperStateMachine.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index a9b5fc85828..35e56ba1e30 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -41,6 +41,9 @@ namespace if (!buffer.eof()) readIntBinary(request_for_session.time, buffer); + else /// backward compatibility + request_for_session.time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + return request_for_session; } From fc572dcb4148a2c144c9de59107a0522881f37f0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 4 Mar 2022 23:32:38 +0100 Subject: [PATCH 40/87] Remove useless define --- src/Core/ProtocolDefines.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 93f44b02ce3..6ee491f3ab5 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -8,7 +8,6 @@ #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406 -#define DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA 54415 /// Minimum revision with exactly the same set of aggregation methods and rules to select them. /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). From 1b9987ceb120666c3bfc8b09c030724c5d199bda Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Sat, 5 Mar 2022 14:22:56 +0800 Subject: [PATCH 41/87] fix mistake in check-style --- programs/local/LocalServer.cpp | 3 ++- src/Client/LocalConnection.cpp | 17 +++++++++++------ src/Client/LocalConnection.h | 9 ++++++--- src/Interpreters/ProfileEventsExt.cpp | 16 +++++++--------- src/Server/TCPHandler.cpp | 2 +- utils/check-style/check-style | 2 ++ 6 files changed, 29 insertions(+), 20 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index a2d36954165..eb562dfd9eb 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -411,7 +411,8 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); - connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress, need_render_profile_events); + connection = LocalConnection::createConnection( + connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index 02b437d7ce6..55505fce987 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -20,11 +20,12 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_) +LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) , send_profile_events(send_profile_events_) + , server_display_name(server_display_name_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -63,7 +64,7 @@ void LocalConnection::updateProgress(const Progress & value) void LocalConnection::getProfileEvents(Block & block) { - ProfileEvents::getProfileEvents("local", state->profile_queue, block, last_sent_snapshots); + ProfileEvents::getProfileEvents(server_display_name, state->profile_queue, block, last_sent_snapshots); } void LocalConnection::sendQuery( @@ -85,14 +86,13 @@ void LocalConnection::sendQuery( if (!current_database.empty()) query_context->setCurrentDatabase(current_database); - query_scope_holder.reset(); - query_scope_holder = std::make_unique(query_context); state.reset(); state.emplace(); state->query_id = query_id; state->query = query; + state->query_scope_holder = std::make_unique(query_context); state->stage = QueryProcessingStage::Enum(stage); state->profile_queue = std::make_shared(std::numeric_limits::max()); CurrentThread::attachInternalProfileEventsQueue(state->profile_queue); @@ -483,9 +483,14 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const PartitionReadResponse throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented"); } -ServerConnectionPtr LocalConnection::createConnection(const ConnectionParameters &, ContextPtr current_context, bool send_progress, bool send_profile_events) +ServerConnectionPtr LocalConnection::createConnection( + const ConnectionParameters &, + ContextPtr current_context, + bool send_progress, + bool send_profile_events, + const String & server_display_name) { - return std::make_unique(current_context, send_progress, send_profile_events); + return std::make_unique(current_context, send_progress, send_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 7edb791a177..62e95cdfee6 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -54,13 +54,15 @@ struct LocalQueryState Stopwatch after_send_progress; Stopwatch after_send_profile_events; + std::unique_ptr query_scope_holder; }; class LocalConnection : public IServerConnection, WithContext { public: - explicit LocalConnection(ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false); + explicit LocalConnection( + ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = ""); ~LocalConnection() override; @@ -70,7 +72,8 @@ public: const ConnectionParameters & connection_parameters, ContextPtr current_context, bool send_progress = false, - bool send_profile_events = false); + bool send_profile_events = false, + const String & server_display_name = ""); void setDefaultDatabase(const String & database) override; @@ -146,6 +149,7 @@ private: bool send_progress; bool send_profile_events; + String server_display_name; String description = "clickhouse-local"; std::optional state; @@ -157,6 +161,5 @@ private: String current_database; ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; - std::unique_ptr query_scope_holder; }; } diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 6961d70529e..ea87d565854 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -79,16 +79,14 @@ static void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::Mutabl static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) { - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(Type::GAUGE); + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(Type::GAUGE); - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); - } + columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); + columns[i++]->insert(snapshot.memory_usage); } void getProfileEvents( diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 5be340386d8..f4592a8b2c9 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -859,7 +859,7 @@ void TCPHandler::sendProfileEvents() Block block; ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots); - if (!!block.rows()) + if (block.rows() != 0) { initProfileEventsBlockOutput(block); diff --git a/utils/check-style/check-style b/utils/check-style/check-style index d178778a410..6ebf53cb932 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -74,6 +74,8 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Type ProfileEvents::TypeEnum ProfileEvents::dumpToMapColumn + ProfileEvents::getProfileEvents + ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME ProfileEvents::CountersIncrement From 240b5e8199248c700df342179cf61fd4a24843b7 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sat, 5 Mar 2022 13:23:52 +0300 Subject: [PATCH 42/87] Update ym-dict-functions.md --- .../sql-reference/functions/ym-dict-functions.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index f947c81c7a9..1e6c9cbd0b4 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,11 +1,11 @@ --- toc_priority: 59 -toc_title: Yandex.Metrica Dictionaries +toc_title: Embedded Dictionaries --- -# Functions for Working with Yandex.Metrica Dictionaries {#functions-for-working-with-yandex-metrica-dictionaries} +# Functions for Working with Embedded Dictionaries -In order for the functions below to work, the server config must specify the paths and addresses for getting all the Yandex.Metrica dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. +In order for the functions below to work, the server config must specify the paths and addresses for getting all the embedded dictionaries. The dictionaries are loaded at the first call of any of these functions. If the reference lists can’t be loaded, an exception is thrown. For information about creating reference lists, see the section “Dictionaries”. @@ -33,7 +33,7 @@ regionToCountry(RegionID, 'ua') – Uses the dictionary for the 'ua' key: /opt/g ### regionToCity(id\[, geobase\]) {#regiontocityid-geobase} -Accepts a UInt32 number – the region ID from the Yandex geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. +Accepts a UInt32 number – the region ID from the geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0. ### regionToArea(id\[, geobase\]) {#regiontoareaid-geobase} @@ -117,7 +117,7 @@ regionToTopContinent(id[, geobase]) **Arguments** -- `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md). +- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. **Returned value** @@ -132,7 +132,7 @@ Type: `UInt32`. Gets the population for a region. The population can be recorded in files with the geobase. See the section “External dictionaries”. If the population is not recorded for the region, it returns 0. -In the Yandex geobase, the population might be recorded for child regions, but not for parent regions. +In the geobase, the population might be recorded for child regions, but not for parent regions. ### regionIn(lhs, rhs\[, geobase\]) {#regioninlhs-rhs-geobase} @@ -141,12 +141,12 @@ The relationship is reflexive – any region also belongs to itself. ### regionHierarchy(id\[, geobase\]) {#regionhierarchyid-geobase} -Accepts a UInt32 number – the region ID from the Yandex geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. +Accepts a UInt32 number – the region ID from the geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain. Example: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`. ### regionToName(id\[, lang\]) {#regiontonameid-lang} -Accepts a UInt32 number – the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. +Accepts a UInt32 number – the region ID from the geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ‘ru’ is used. If the language is not supported, an exception is thrown. Returns a string – the name of the region in the corresponding language. If the region with the specified ID does not exist, an empty string is returned. `ua` and `uk` both mean Ukrainian. From 8a12a4c214238c88f66e6f1651e4ea2d6ec8f4c2 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 5 Mar 2022 16:17:08 +0000 Subject: [PATCH 43/87] Try to fix failed tests --- .../0_stateless/00135_duplicate_group_by_keys_segfault.sql | 2 ++ tests/queries/0_stateless/01926_order_by_desc_limit.sql | 2 ++ tests/queries/0_stateless/02015_async_inserts_stress_long.sh | 3 +++ 3 files changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql index 16356046a36..c54593056cf 100644 --- a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql +++ b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_rows_to_read = 1000000; SET read_overflow_mode = 'break'; SELECT concat(toString(number % 256 AS n), '') AS s, n, max(s) FROM system.numbers_mt GROUP BY s, n, n, n, n, n, n, n, n, n ORDER BY s, n; diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 7ea102e11e9..9ee7f4a6aff 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS order_by_desc; CREATE TABLE order_by_desc (u UInt32, s String) diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index f9a58818404..e773024bb12 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash +# Tags: no-random-settings + set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh + function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" From 722e0ea214ab50d32d5e2c4d586ecbae63fa9ad7 Mon Sep 17 00:00:00 2001 From: avogar Date: Sat, 5 Mar 2022 16:46:14 +0000 Subject: [PATCH 44/87] Fix clickhouse-test --- tests/clickhouse-test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 4c94e4d6493..121a283d0e4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -446,7 +446,7 @@ class TestCase: else: os.environ['CLICKHOUSE_URL_PARAMS'] = self.base_url_params + '&' + '&'.join(self.random_settings) - new_options = "--allow_repeated_settings --" + " --".join(self.random_settings) + new_options = " --allow_repeated_settings --" + " --".join(self.random_settings) os.environ['CLICKHOUSE_CLIENT_OPT'] = self.base_client_options + new_options + ' ' return client_options + new_options From 3ec6cd31280c0c01b3e0bea8cb54a53f21c1ecc3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 6 Mar 2022 11:03:22 +0100 Subject: [PATCH 45/87] Update StorageFileLog.cpp --- src/Storages/FileLog/StorageFileLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b43f47eb6c5..700b35a5a48 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -108,6 +108,7 @@ void StorageFileLog::loadMetaFiles(bool attach) if (std::filesystem::exists(previous_path) && !metadata_path_exist) { std::filesystem::copy(previous_path, metadata_base_path, std::filesystem::copy_options::recursive); + std::filesystem::remove_all(previous_path); } /// Meta file may lost, log and create directory else if (!metadata_path_exist) From bc224dee3601ef22015b432d976eeea7d149e8dc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 6 Mar 2022 13:39:49 +0300 Subject: [PATCH 46/87] Do not hide exceptions during mutations system.mutations includes only the message, but not stacktrace, and it is not always obvious to understand the culprit w/o stacktrace. Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 8f254b4790e..e3fa07dd0c0 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -94,6 +94,7 @@ bool MutatePlainMergeTreeTask::executeStep() { storage.updateMutationEntriesErrors(future_part, false, getCurrentExceptionMessage(false)); write_part_log(ExecutionStatus::fromCurrentException()); + tryLogCurrentException(__PRETTY_FUNCTION__); return false; } } From 081f9caa040b793c6479bde1477a9380f6ad8455 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 6 Mar 2022 13:23:50 +0300 Subject: [PATCH 47/87] Avoid possible deadlock on server shutdown Here is an example for deadlock during shutting down DDLWorker: Server main thread: 6 ThreadFromGlobalPool::join () at ../src/Common/ThreadPool.h:217 7 DB::DDLWorker::shutdown () at ../src/Interpreters/DDLWorker.cpp:123 8 DB::DDLWorker::~DDLWorker () at ../src/Interpreters/DDLWorker.cpp:131 9 DB::DDLWorker::~DDLWorker () at ../src/Interpreters/DDLWorker.cpp:130 10 std::__1::default_delete::operator() () at ../contrib/libcxx/include/memory:1397 11 std::__1::unique_ptr<>::reset (this=0x7f7521d44fd0, __p=0x0) at ../contrib/libcxx/include/memory:1658 12 DB::ContextSharedPart::shutdown () at ../src/Interpreters/Context.cpp:380 From 9efc8a1d3852e13232e68735ad2fd8dd99e7dd65 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Mon, 7 Mar 2022 11:11:14 +0800 Subject: [PATCH 60/87] Fix min/max stat --- src/Coordination/KeeperConnectionStats.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperConnectionStats.cpp b/src/Coordination/KeeperConnectionStats.cpp index 26455de25f5..b4edfe45159 100644 --- a/src/Coordination/KeeperConnectionStats.cpp +++ b/src/Coordination/KeeperConnectionStats.cpp @@ -53,11 +53,11 @@ void KeeperConnectionStats::updateLatency(uint64_t latency_ms) total_latency.fetch_add(latency_ms, std::memory_order_relaxed); count.fetch_add(1, std::memory_order_relaxed); - if (latency_ms < min_latency.load(std::memory_order_relaxed)) - min_latency.store(latency_ms, std::memory_order_relaxed); + uint64_t prev_val = min_latency.load(std::memory_order_relaxed); + while (prev_val > latency_ms && !min_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {} - if (latency_ms > max_latency.load(std::memory_order_relaxed)) - max_latency.store(latency_ms, std::memory_order_relaxed); + prev_val = max_latency.load(std::memory_order_relaxed); + while (prev_val < latency_ms && !max_latency.compare_exchange_weak(prev_val, latency_ms, std::memory_order_relaxed)) {} } void KeeperConnectionStats::reset() From cfeedd2cb53d6e781d60d3673855fd8f7ea54d21 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 7 Mar 2022 12:28:31 +0800 Subject: [PATCH 61/87] fixed code style --- src/Storages/Hive/StorageHive.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index f3381b09958..90f339cb8ec 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -52,7 +52,7 @@ public: SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) override; NamesAndTypesList getVirtuals() const override; - + bool isColumnOriented() const override; protected: From 8ae5296ee81d17a6a02e91c3acd7892c099c9fca Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 7 Mar 2022 17:26:48 +0800 Subject: [PATCH 62/87] fixed compile errors --- src/Storages/Hive/StorageHive.cpp | 2 +- src/Storages/Hive/StorageHive.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 4db2b8b98d1..540089ecf62 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -548,7 +548,7 @@ bool StorageHive::isColumnOriented() const return format_name == "Parquet" || format_name == "ORC"; } -Block StorageHive::getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns) +Block StorageHive::getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns) const { if (!isColumnOriented()) return header_block; diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 90f339cb8ec..eccd04a6759 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -90,7 +90,7 @@ private: HiveFilePtr createHiveFileIfNeeded(const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_); - Block getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns); + Block getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns) const; String hive_metastore_url; From 5423c5a45cc42ebaddedee38986e1d11695fe4c8 Mon Sep 17 00:00:00 2001 From: 1lann Date: Mon, 7 Mar 2022 18:29:10 +0800 Subject: [PATCH 63/87] Fix typo of update_lag In external dictionary providers, the allowed keys for configuration seemed to have a typo of "update_lag" as "update_tag", preventing the use of "update_lag". This change fixes that. --- src/Dictionaries/ClickHouseDictionarySource.cpp | 2 +- src/Dictionaries/MySQLDictionarySource.cpp | 2 +- src/Dictionaries/PostgreSQLDictionarySource.cpp | 2 +- src/Storages/ExternalDataSourceConfiguration.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index deecc3c983e..5a18dcffb22 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", - "update_field", "update_tag", "invalidate_query", "query", "where", "name", "secure"}; + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "secure"}; namespace { diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 29d70f3a7c4..6578f91aa73 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -34,7 +34,7 @@ static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "invalidate_query", "priority", - "update_tag", "dont_check_update_time", + "update_lag", "dont_check_update_time", "query", "where", "name" /* name_collection */, "socket", "share_connection", "fail_on_connection_loss", "close_connection", "ssl_ca", "ssl_cert", "ssl_key", diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 6fdf486fdbf..511d6a7288e 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -30,7 +30,7 @@ static const UInt64 max_block_size = 8192; static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", - "update_field", "update_tag", "invalidate_query", "query", "where", "name", "priority"}; + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; namespace { diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 2d4b05c51b5..5549a816a06 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -34,7 +34,7 @@ IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "replica", - "update_field", "update_tag", "invalidate_query", "query", + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "secure", "uri", "collection"}; From 202ac18e764c82497742bf5db1b9bae8801e8dc5 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 4 Mar 2022 15:50:21 +0000 Subject: [PATCH 64/87] Skip 01086_odbc_roundtrip for aarch, disable force_tests --- tests/ci/ci_config.py | 2 -- tests/queries/0_stateless/01086_odbc_roundtrip.sh | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 000d3d9a000..b45a4ce90c6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -231,7 +231,6 @@ CI_CONFIG = { }, "Stateful tests (aarch64, actions)": { "required_build": "package_aarch64", - "force_tests": True, }, "Stateful tests (release, DatabaseOrdinary, actions)": { "required_build": "package_release", @@ -259,7 +258,6 @@ CI_CONFIG = { }, "Stateless tests (aarch64, actions)": { "required_build": "package_aarch64", - "force_tests": True, }, "Stateless tests (release, wide parts enabled, actions)": { "required_build": "package_release", diff --git a/tests/queries/0_stateless/01086_odbc_roundtrip.sh b/tests/queries/0_stateless/01086_odbc_roundtrip.sh index 705746032f8..20066c6b34c 100755 --- a/tests/queries/0_stateless/01086_odbc_roundtrip.sh +++ b/tests/queries/0_stateless/01086_odbc_roundtrip.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Tags: no-asan, no-msan, no-fasttest +# Tags: no-asan, no-msan, no-fasttest, no-cpu-aarch64 # Tag no-msan: can't pass because odbc libraries are not instrumented +# Tag no-cpu-aarch64: clickhouse-odbc is not setup for arm CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 688493373f0cb66bc3ebfd3ecdd33c8e8ea501b4 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 4 Mar 2022 10:36:37 +0000 Subject: [PATCH 65/87] Try to fix trim function --- src/Parsers/ExpressionElementParsers.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e00e0aba7b3..c51201750c5 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -442,9 +442,9 @@ namespace pattern_list_args->children = { std::make_shared("^["), to_remove, - std::make_shared("]*|["), + std::make_shared("]+|["), to_remove, - std::make_shared("]*$") + std::make_shared("]+$") }; func_name = "replaceRegexpAll"; } @@ -455,7 +455,7 @@ namespace pattern_list_args->children = { std::make_shared("^["), to_remove, - std::make_shared("]*") + std::make_shared("]+") }; } else @@ -464,7 +464,7 @@ namespace pattern_list_args->children = { std::make_shared("["), to_remove, - std::make_shared("]*$") + std::make_shared("]+$") }; } func_name = "replaceRegexpOne"; From e81bbfb5b538de09e66afb769f6d5a1238e474ef Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Mar 2022 12:13:37 +0100 Subject: [PATCH 66/87] Review fixes --- src/Coordination/KeeperStateManager.cpp | 40 ++++++++++++++++++------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index ca50d14bca6..f9bfea5e69a 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -45,12 +45,37 @@ bool isLocalhost(const std::string & hostname) return false; } +std::unordered_map getClientPorts(const Poco::Util::AbstractConfiguration & config) +{ + static const char * config_port_names[] = { + "keeper_server.tcp_port", + "keeper_server.tcp_port_secure", + "interserver_http_port", + "interserver_https_port", + "tcp_port", + "tcp_with_proxy_port", + "tcp_port_secure", + "mysql_port", + "postgresql_port", + "grpc_port", + "prometheus.port", + }; + + std::unordered_map ports; + for (const auto & config_port_name : config_port_names) + { + if (config.has(config_port_name)) + ports[config.getUInt64(config_port_name)] = config_port_name; + } + return ports; +} + } /// this function quite long because contains a lot of sanity checks in config: /// 1. No duplicate endpoints /// 2. No "localhost" or "127.0.0.1" or another local addresses mixed with normal addresses -/// 3. Raft internal port is equal to client port +/// 3. Raft internal port is not equal to any other port for client /// 4. No duplicate IDs /// 5. Our ID present in hostnames list KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersConfiguration(const Poco::Util::AbstractConfiguration & config, bool allow_without_us) const @@ -60,12 +85,7 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC Poco::Util::AbstractConfiguration::Keys keys; config.keys(config_prefix + ".raft_configuration", keys); - std::unordered_set client_ports; - if (config.has(config_prefix + ".tcp_port")) - client_ports.insert(config.getUInt64(config_prefix + ".tcp_port")); - - if (config.has(config_prefix + ".tcp_port_secure")) - client_ports.insert(config.getUInt64(config_prefix + ".tcp_port_secure")); + auto client_ports = getClientPorts(config); /// Sometimes (especially in cloud envs) users can provide incorrect /// configuration with duplicated raft ids or endpoints. We check them @@ -89,10 +109,10 @@ KeeperStateManager::KeeperConfigurationWrapper KeeperStateManager::parseServersC int32_t priority = config.getInt(full_prefix + ".priority", 1); bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false); - if (client_ports.contains(port)) + if (client_ports.count(port) != 0) { - throw Exception(ErrorCodes::RAFT_ERROR, "Raft config contains hostname '{}' with port '{}' which is equal to client port on current machine", - hostname, port); + throw Exception(ErrorCodes::RAFT_ERROR, "Raft configuration contains hostname '{}' with port '{}' which is equal to '{}' in server configuration", + hostname, port, client_ports[port]); } if (isLoopback(hostname)) From 20478e9088779799b522220d8e26c53c35d57cf3 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 4 Mar 2022 10:42:31 +0000 Subject: [PATCH 67/87] add testcase to 02100_replaceRegexpAll_bug --- tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference | 1 + tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql | 2 ++ 2 files changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference index 993dd9b1cde..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -9,3 +9,4 @@ 1 1 1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql index 32f7f63f6d0..66ccb044549 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -12,3 +12,5 @@ SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; + +SELECT trim(BOTH '"' FROM '2') == '2' From b0f9cc4838e1e0af2eb779f80c6693aeea369ab3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Mon, 7 Mar 2022 12:20:55 +0100 Subject: [PATCH 68/87] Keep deprecated installation methods --- docs/_includes/install/deb_repo.sh | 11 +++++++++++ docs/_includes/install/rpm_repo.sh | 7 +++++++ docs/_includes/install/tgz_repo.sh | 19 +++++++++++++++++++ docs/en/getting-started/install.md | 24 ++++++++++++++++++++++++ docs/ja/getting-started/install.md | 24 ++++++++++++++++++++++++ docs/ru/getting-started/install.md | 24 ++++++++++++++++++++++++ docs/zh/getting-started/install.md | 24 ++++++++++++++++++++++++ 7 files changed, 133 insertions(+) create mode 100644 docs/_includes/install/deb_repo.sh create mode 100644 docs/_includes/install/rpm_repo.sh create mode 100644 docs/_includes/install/tgz_repo.sh diff --git a/docs/_includes/install/deb_repo.sh b/docs/_includes/install/deb_repo.sh new file mode 100644 index 00000000000..21106e9fc47 --- /dev/null +++ b/docs/_includes/install/deb_repo.sh @@ -0,0 +1,11 @@ +sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm_repo.sh b/docs/_includes/install/rpm_repo.sh new file mode 100644 index 00000000000..e3fd1232047 --- /dev/null +++ b/docs/_includes/install/rpm_repo.sh @@ -0,0 +1,7 @@ +sudo yum install yum-utils +sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo +sudo yum install clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/tgz_repo.sh b/docs/_includes/install/tgz_repo.sh new file mode 100644 index 00000000000..0994510755b --- /dev/null +++ b/docs/_includes/install/tgz_repo.sh @@ -0,0 +1,19 @@ +export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ + grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz + +tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz +sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz +sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-server-$LATEST_VERSION.tgz +sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +sudo /etc/init.d/clickhouse-server start + +tar -xzvf clickhouse-client-$LATEST_VERSION.tgz +sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index da6e225f745..da75a991b0b 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -27,6 +27,14 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable). @@ -52,6 +60,14 @@ First, you need to add the official repository: {% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. Then run these commands to install packages: @@ -73,6 +89,14 @@ After that downloaded archives should be unpacked and installed with installatio {% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. ### From Docker Image {#from-docker-image} diff --git a/docs/ja/getting-started/install.md b/docs/ja/getting-started/install.md index 575506c3c4b..10dd2d74f5d 100644 --- a/docs/ja/getting-started/install.md +++ b/docs/ja/getting-started/install.md @@ -28,6 +28,14 @@ Debian や Ubuntu 用にコンパイル済みの公式パッケージ `deb` を {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ 最新版を使いたい場合は、`stable`を`testing`に置き換えてください。(テスト環境ではこれを推奨します) 同様に、[こちら](https://packages.clickhouse.com/deb/pool/stable)からパッケージをダウンロードして、手動でインストールすることもできます。 @@ -49,6 +57,14 @@ CentOS、RedHat、その他すべてのrpmベースのLinuxディストリビュ {% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ 最新版を使いたい場合は `stable` を `testing` に置き換えてください。(テスト環境ではこれが推奨されています)。`prestable` もしばしば同様に利用できます。 そして、以下のコマンドを実行してパッケージをインストールします: @@ -69,6 +85,14 @@ sudo yum install clickhouse-server clickhouse-client {% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ 本番環境では、最新の `stable` バージョンを使うことをお勧めします。GitHub のページ https://github.com/ClickHouse/ClickHouse/tags で 接尾辞 `-stable` となっているバージョン番号として確認できます。 ### Dockerイメージから {#from-docker-image} diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index 84f9bc576e9..64c5cac26df 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -27,6 +27,14 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su {% include 'install/deb.sh' %} ``` +
+ +Устаревший способ установки deb-пакетов +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable). @@ -52,6 +60,14 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su {% include 'install/rpm.sh' %} ``` +
+ +Устаревший способ установки rpm-пакетов +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. Для, собственно, установки пакетов необходимо выполнить следующие команды: @@ -73,6 +89,14 @@ sudo yum install clickhouse-server clickhouse-client {% include 'install/tgz.sh' %} ``` +
+ +Устаревший способ установки из архивов tgz +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ Для production окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`. ### Из Docker образа {#from-docker-image} diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index 6a966355fae..a6a8b05483a 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -27,6 +27,14 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。 @@ -48,6 +56,14 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not {% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 然后运行命令安装: @@ -70,6 +86,14 @@ sudo yum install clickhouse-server clickhouse-client {% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ 对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 ### `Docker`安装包 {#from-docker-image} From aae13ed9123486ce574e634af08d32111540a9e6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 7 Mar 2022 15:18:57 +0100 Subject: [PATCH 69/87] Supress move partition long for storage S3 --- tests/queries/0_stateless/01154_move_partition_long.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 6b0b0773cb6..7cefac28e22 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long, no-parallel, no-s3-storage +# FIXME: s3 storage should work OK, it +# reproduces bug which exists not only in S3 version. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 84e22fb32bc6e4f54444d4356413865276712d57 Mon Sep 17 00:00:00 2001 From: tavplubix Date: Mon, 7 Mar 2022 18:59:00 +0300 Subject: [PATCH 70/87] Update DiskLocal.cpp --- src/Disks/DiskLocal.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 57bfaf405e0..e49e9cf6726 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -325,7 +326,7 @@ DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) void DiskLocal::moveFile(const String & from_path, const String & to_path) { - fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); + renameNoReplace(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); } void DiskLocal::replaceFile(const String & from_path, const String & to_path) From fe4534d4646e51e40ac04fec83e4a321c4ce19ea Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 7 Mar 2022 21:51:34 +0800 Subject: [PATCH 71/87] Get rid of duplicate query planing. --- src/Interpreters/InterpreterSelectQuery.cpp | 20 ++++++++----------- .../InterpreterSelectWithUnionQuery.cpp | 4 +++- src/Interpreters/JoinedTables.cpp | 4 +++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 97a06842d97..f2fc17fbf9a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1902,20 +1902,16 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc else if (interpreter_subquery) { /// Subquery. - /// If we need less number of columns that subquery have - update the interpreter. - if (required_columns.size() < source_header.columns()) - { - ASTPtr subquery = extractTableExpression(query, 0); - if (!subquery) - throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + ASTPtr subquery = extractTableExpression(query, 0); + if (!subquery) + throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); - interpreter_subquery = std::make_unique( - subquery, getSubqueryContext(context), - options.copy().subquery().noModify(), required_columns); + interpreter_subquery = std::make_unique( + subquery, getSubqueryContext(context), + options.copy().subquery().noModify(), required_columns); - if (query_analyzer->hasAggregation()) - interpreter_subquery->ignoreWithTotals(); - } + if (query_analyzer->hasAggregation()) + interpreter_subquery->ignoreWithTotals(); interpreter_subquery->buildQueryPlan(query_plan); query_plan.addInterpreterContext(context); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 723db59f04b..130b3aae58d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -208,8 +208,10 @@ Block InterpreterSelectWithUnionQuery::getCurrentChildResultHeader(const ASTPtr if (ast_ptr_->as()) return InterpreterSelectWithUnionQuery(ast_ptr_, context, options.copy().analyze().noModify(), required_result_column_names) .getSampleBlock(); - else + else if (ast_ptr_->as()) return InterpreterSelectQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock(); + else + return InterpreterSelectIntersectExceptQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock(); } std::unique_ptr diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 3aae3982758..482a813bfef 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -183,7 +183,9 @@ std::unique_ptr JoinedTables::makeLeftTableSubq { if (!isLeftTableSubquery()) return {}; - return std::make_unique(left_table_expression, context, select_options); + + /// Only build dry_run interpreter during analysis. We will reconstruct the subquery interpreter during plan building. + return std::make_unique(left_table_expression, context, select_options.copy().analyze()); } StoragePtr JoinedTables::getLeftTableStorage() From a8cfc2458a8a5db2a946096fad0bc2299cf5eb10 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 8 Mar 2022 11:55:15 +0800 Subject: [PATCH 72/87] update codes --- src/Storages/Hive/StorageHive.cpp | 30 +++++++++++++++++------------- src/Storages/Hive/StorageHive.h | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 540089ecf62..a11488cf3cf 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -115,12 +115,12 @@ public: , format(std::move(format_)) , compression_method(compression_method_) , max_block_size(max_block_size_) - , sample_block(sample_block_) - , to_read_block(sample_block) + , sample_block(std::move(sample_block_)) , columns_description(getColumnsDescription(sample_block, source_info)) , text_input_field_names(text_input_field_names_) , format_settings(getFormatSettings(getContext())) { + to_read_block = sample_block; /// Initialize to_read_block, which is used to read data from HDFS. for (const auto & name_type : source_info->partition_name_types) { @@ -206,12 +206,16 @@ public: /// Enrich with partition columns. auto types = source_info->partition_name_types.getTypes(); + auto names = source_info->partition_name_types.getNames(); + auto fields = source_info->hive_files[current_idx]->getPartitionValues(); for (size_t i = 0; i < types.size(); ++i) { - if (!sample_block.has(source_info->partition_name_types.getNames()[i])) + // Only add the required partition columns. partition columns are not read from readbuffer + // the column must be in sample_block, otherwise sample_block.getPositionByName(names[i]) will throw an exception + if (!sample_block.has(names[i])) continue; - auto column = types[i]->createColumnConst(num_rows, source_info->hive_files[current_idx]->getPartitionValues()[i]); - auto previous_idx = sample_block.getPositionByName(source_info->partition_name_types.getNames()[i]); + auto column = types[i]->createColumnConst(num_rows, fields[i]); + auto previous_idx = sample_block.getPositionByName(names[i]); columns.insert(columns.begin() + previous_idx, column); } @@ -548,28 +552,28 @@ bool StorageHive::isColumnOriented() const return format_name == "Parquet" || format_name == "ORC"; } -Block StorageHive::getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns) const +void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const { if (!isColumnOriented()) - return header_block; - Block result_block = sample_block; + sample_block = header_block; + UInt32 erased_columns = 0; for (const auto & column : partition_columns) { - sample_block.erase(column); + if (sample_block.has(column)) + erased_columns++; } - if (!sample_block.columns()) + if (erased_columns == sample_block.columns()) { for (size_t i = 0; i < header_block.columns(); ++i) { const auto & col = header_block.getByPosition(i); if (!partition_columns.count(col.name)) { - result_block.insert(col); + sample_block.insert(col); break; } } } - return result_block; } Pipe StorageHive::read( const Names & column_names, @@ -646,7 +650,7 @@ Pipe StorageHive::read( sources_info->need_file_column = true; } - sample_block = getActualColumnsToRead(sample_block, header_block, NameSet{partition_names.begin(), partition_names.end()}); + getActualColumnsToRead(sample_block, header_block, NameSet{partition_names.begin(), partition_names.end()}); if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index eccd04a6759..71d17750190 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -90,7 +90,7 @@ private: HiveFilePtr createHiveFileIfNeeded(const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_); - Block getActualColumnsToRead(Block sample_block, const Block & header_block, const NameSet & partition_columns) const; + void getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const; String hive_metastore_url; From caffc144b5a1c31e93c3db7cae0eb22494217b45 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 24 Feb 2022 15:23:26 +0300 Subject: [PATCH 73/87] Fix possible "Part directory doesn't exist" during INSERT In #33291 final part commit had been defered, and now it can take significantly more time, that may lead to "Part directory doesn't exist" error during INSERT: 2022.02.21 18:18:06.979881 [ 11329 ] {insert} executeQuery: (from 127.1:24572, user: default) INSERT INTO db.table (...) VALUES 2022.02.21 20:58:03.933593 [ 11329 ] {insert} db.table: Renaming temporary part tmp_insert_20220214_18044_18044_0 to 20220214_270654_270654_0. 2022.02.21 21:16:50.961917 [ 11329 ] {insert} db.table: Renaming temporary part tmp_insert_20220214_18197_18197_0 to 20220214_270689_270689_0. ... 2022.02.22 21:16:57.632221 [ 64878 ] {} db.table: Removing temporary directory /clickhouse/data/db/table/tmp_insert_20220214_18232_18232_0/ ... 2022.02.23 12:23:56.277480 [ 11329 ] {insert} db.table: Renaming temporary part tmp_insert_20220214_18232_18232_0 to 20220214_273459_273459_0. 2022.02.23 12:23:56.299218 [ 11329 ] {insert} executeQuery: Code: 107. DB::Exception: Part directory /clickhouse/data/db/table/tmp_insert_20220214_18232_18232_0/ doesn't exist. Most likely it is a logical error. (FILE_DOESNT_EXIST) (version 22.2.1.1) (from 127.1:24572) (in query: INSERT INTO db.table (...) VALUES), Stack trace (when copying this message, always include the lines below): Follow-up for: #28760 Refs: #33291 Signed-off-by: Azat Khuzhin --- src/Storages/MergeTree/MergeTask.cpp | 8 ++--- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++-- src/Storages/MergeTree/MergeTreeData.h | 28 ++++++++-------- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 ---- .../MergeTree/MergeTreeDataMergerMutator.h | 20 ----------- .../ReplicatedMergeTreeCleanupThread.cpp | 2 +- src/Storages/MergeTree/TemporaryParts.cpp | 24 ++++++++++++++ src/Storages/MergeTree/TemporaryParts.h | 33 +++++++++++++++++++ src/Storages/StorageMergeTree.cpp | 4 +-- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 10 files changed, 81 insertions(+), 52 deletions(-) create mode 100644 src/Storages/MergeTree/TemporaryParts.cpp create mode 100644 src/Storages/MergeTree/TemporaryParts.h diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 89fb27cc89c..8b5c2e0dc6e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -126,13 +126,9 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (ctx->disk->exists(local_new_part_tmp_path)) throw Exception("Directory " + fullPath(ctx->disk, local_new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); - { - std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); - global_ctx->mutator->tmp_parts.emplace(local_tmp_part_basename); - } + global_ctx->data->temporary_parts.add(local_tmp_part_basename); SCOPE_EXIT( - std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); - global_ctx->mutator->tmp_parts.erase(local_tmp_part_basename); + global_ctx->data->temporary_parts.remove(local_tmp_part_basename); ); global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8ea9d0a31d0..a15da7578e8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1386,7 +1386,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -size_t MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds) +size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -1418,9 +1418,9 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMuta { if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) { - if (merger_mutator.hasTemporaryPart(basename)) + if (temporary_parts.contains(basename)) { - LOG_WARNING(log, "{} is an active destination for one of merge/mutation (consider increasing temporary_directories_lifetime setting)", full_path); + LOG_WARNING(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); continue; } else diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 1a04b2a389b..1e7f127c85b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -3,30 +3,31 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include -#include #include #include #include -#include -#include +#include +#include +#include +#include #include #include -#include +#include +#include #include @@ -566,7 +567,7 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - size_t clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds); + size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds); size_t clearEmptyParts(); @@ -906,7 +907,6 @@ public: mutable std::mutex currently_submerging_emerging_mutex; protected: - friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; friend struct ReplicatedMergeTreeTableMetadata; @@ -1200,6 +1200,8 @@ private: /// Create zero-copy exclusive lock for part and disk. Useful for coordination of /// distributed operations which can lead to data duplication. Implemented only in ReplicatedMergeTree. virtual std::optional tryCreateZeroCopyExclusiveLock(const String &, const DiskPtr &) { return std::nullopt; } + + TemporaryParts temporary_parts; }; /// RAII struct to record big parts that are submerging or emerging. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2f097b69fc4..a6cda0016a8 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -782,10 +782,4 @@ ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadat } -bool MergeTreeDataMergerMutator::hasTemporaryPart(const std::string & basename) const -{ - std::lock_guard lock(tmp_parts_lock); - return tmp_parts.contains(basename); -} - } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 82cad873dce..ae09e2c916c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -192,26 +192,6 @@ private: ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition; /// Performing TTL merges independently for each partition guarantees that /// there is only a limited number of TTL merges and no partition stores data, that is too stale - -public: - /// Returns true if passed part name is active. - /// (is the destination for one of active mutation/merge). - /// - /// NOTE: that it accept basename (i.e. dirname), not the path, - /// since later requires canonical form. - bool hasTemporaryPart(const std::string & basename) const; - -private: - /// Set of active temporary paths that is used as the destination. - /// List of such paths is required to avoid trying to remove them during cleanup. - /// - /// NOTE: It is pretty short, so use STL is fine. - std::unordered_set tmp_parts; - /// Lock for "tmp_parts". - /// - /// NOTE: mutable is required to mark hasTemporaryPath() const - mutable std::mutex tmp_parts_lock; - }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 26bfd951d3d..3b6c727cd02 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -64,7 +64,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// Both use relative_data_path which changes during rename, so we /// do it under share lock storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(storage.merger_mutator, storage.getSettings()->temporary_directories_lifetime.totalSeconds()); + storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/TemporaryParts.cpp b/src/Storages/MergeTree/TemporaryParts.cpp new file mode 100644 index 00000000000..4239c8232e5 --- /dev/null +++ b/src/Storages/MergeTree/TemporaryParts.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +bool TemporaryParts::contains(const std::string & basename) const +{ + std::lock_guard lock(mutex); + return parts.contains(basename); +} + +void TemporaryParts::add(std::string basename) +{ + std::lock_guard lock(mutex); + parts.emplace(std::move(basename)); +} + +void TemporaryParts::remove(const std::string & basename) +{ + std::lock_guard lock(mutex); + parts.erase(basename); +} + +} diff --git a/src/Storages/MergeTree/TemporaryParts.h b/src/Storages/MergeTree/TemporaryParts.h new file mode 100644 index 00000000000..bc9d270856f --- /dev/null +++ b/src/Storages/MergeTree/TemporaryParts.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +/// Manages set of active temporary paths that should not be cleaned by background thread. +class TemporaryParts : private boost::noncopyable +{ +private: + /// To add const qualifier for contains() + mutable std::mutex mutex; + + /// NOTE: It is pretty short, so use STL is fine. + std::unordered_set parts; + +public: + /// Returns true if passed part name is active. + /// (is the destination for one of active mutation/merge). + /// + /// NOTE: that it accept basename (i.e. dirname), not the path, + /// since later requires canonical form. + bool contains(const std::string & basename) const; + + void add(std::string basename); + void remove(const std::string & basename); +}; + +} diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2db93def004..a05ed04a66c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -108,7 +108,7 @@ void StorageMergeTree::startup() /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately - clearOldTemporaryDirectories(merger_mutator, 0); + clearOldTemporaryDirectories(0); /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup_parts.restart(); @@ -1062,7 +1062,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign assignee.scheduleCommonTask(ExecutableLambdaAdapter::create( [this, share_lock] () { - return clearOldTemporaryDirectories(merger_mutator, getSettings()->temporary_directories_lifetime.totalSeconds()); + return clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); scheduled = true; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ab42396f8da..82bddddb32d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -451,7 +451,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. - clearOldTemporaryDirectories(merger_mutator, 0); + clearOldTemporaryDirectories(0); clearOldWriteAheadLogs(); } From 6499fc2c455289ec9e74689dd20d4ec0e8da8ab1 Mon Sep 17 00:00:00 2001 From: cnmade Date: Tue, 8 Mar 2022 17:03:46 +0800 Subject: [PATCH 74/87] Translate zh/sql-reference/statements/alter/settings-profile: rename old file --- .../alter/{settings-profile.md => settings-profile.md.bak} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/zh/sql-reference/statements/alter/{settings-profile.md => settings-profile.md.bak} (100%) diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md b/docs/zh/sql-reference/statements/alter/settings-profile.md.bak similarity index 100% rename from docs/zh/sql-reference/statements/alter/settings-profile.md rename to docs/zh/sql-reference/statements/alter/settings-profile.md.bak From 80a8e4aa10c8a69901b31dc0eb93ad25953fe281 Mon Sep 17 00:00:00 2001 From: cnmade Date: Tue, 8 Mar 2022 17:10:54 +0800 Subject: [PATCH 75/87] Translate zh/sql-reference/statements/alter/settings-profile: reimport file --- .../statements/alter/settings-profile.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 docs/zh/sql-reference/statements/alter/settings-profile.md diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md b/docs/zh/sql-reference/statements/alter/settings-profile.md new file mode 100644 index 00000000000..57d12142c48 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/settings-profile.md @@ -0,0 +1,16 @@ +--- +toc_priority: 48 +toc_title: SETTINGS PROFILE +--- + +## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} + +Changes settings profiles. + +Syntax: + +``` sql +ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] + [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] +``` From 0d668e4b15caf9175ec809158f95f738c60d76fd Mon Sep 17 00:00:00 2001 From: cnmade Date: Tue, 8 Mar 2022 17:13:36 +0800 Subject: [PATCH 76/87] Translate zh/sql-reference/statements/alter/settings-profile: translate to zh --- .../zh/sql-reference/statements/alter/settings-profile.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md b/docs/zh/sql-reference/statements/alter/settings-profile.md index 57d12142c48..045b2461e8c 100644 --- a/docs/zh/sql-reference/statements/alter/settings-profile.md +++ b/docs/zh/sql-reference/statements/alter/settings-profile.md @@ -1,13 +1,13 @@ --- toc_priority: 48 -toc_title: SETTINGS PROFILE +toc_title: 配置文件设置 --- -## ALTER SETTINGS PROFILE {#alter-settings-profile-statement} +## 更改配置文件设置 {#alter-settings-profile-statement} -Changes settings profiles. +更改配置文件设置。 -Syntax: +语法: ``` sql ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] From e0ab2c7ca2a2dc235f6f1048d1cc5dccdb50daba Mon Sep 17 00:00:00 2001 From: cnmade Date: Tue, 8 Mar 2022 17:14:06 +0800 Subject: [PATCH 77/87] Translate zh/sql-reference/statements/alter/settings-profile: remove old file --- docs/zh/sql-reference/statements/alter/settings-profile.md.bak | 1 - 1 file changed, 1 deletion(-) delete mode 120000 docs/zh/sql-reference/statements/alter/settings-profile.md.bak diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md.bak b/docs/zh/sql-reference/statements/alter/settings-profile.md.bak deleted file mode 120000 index 0e71ac4e831..00000000000 --- a/docs/zh/sql-reference/statements/alter/settings-profile.md.bak +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/settings-profile.md \ No newline at end of file From c4b634285363093fb71a7e57fcd3273a0d52d91d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 8 Mar 2022 17:24:39 +0300 Subject: [PATCH 78/87] Improvements for `parallel_distributed_insert_select` (and related) (#34728) * Add a warning if parallel_distributed_insert_select was ignored Signed-off-by: Azat Khuzhin * Respect max_distributed_depth for parallel_distributed_insert_select Signed-off-by: Azat Khuzhin * Print warning for non applied parallel_distributed_insert_select only for initial query Signed-off-by: Azat Khuzhin * Remove Cluster::getHashOfAddresses() Signed-off-by: Azat Khuzhin * Forbid parallel_distributed_insert_select for remote()/cluster() with different addresses Before it uses empty cluster name (getClusterName()) which is not correct, compare all addresses instead. Signed-off-by: Azat Khuzhin * Fix max_distributed_depth check max_distributed_depth=1 must mean not more then one distributed query, not two, since max_distributed_depth=0 means no limit, and distribute_depth is 0 for the first query. Signed-off-by: Azat Khuzhin * Fix INSERT INTO remote()/cluster() with parallel_distributed_insert_select Signed-off-by: Azat Khuzhin * Add a test for parallel_distributed_insert_select with cluster()/remote() Signed-off-by: Azat Khuzhin * Return instead of empty cluster name in Distributed engine Signed-off-by: Azat Khuzhin * Make user with sharding_key and w/o in remote()/cluster() identical Before with sharding_key the user was "default", while w/o it it was empty. Signed-off-by: Azat Khuzhin --- src/Interpreters/Cluster.h | 2 -- .../ClusterProxy/executeQuery.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 +- .../ExternalDataSourceConfiguration.h | 2 +- src/Storages/StorageDistributed.cpp | 33 ++++++++++++++++-- src/Storages/StorageDistributed.h | 3 +- ...istributed_insert_select_cluster.reference | 27 +++++++++++++++ ...llel_distributed_insert_select_cluster.sql | 34 +++++++++++++++++++ 8 files changed, 95 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference create mode 100644 tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 3773dadaf13..248d212ebf0 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -207,7 +207,6 @@ public: using ShardsInfo = std::vector; - String getHashOfAddresses() const { return hash_of_addresses; } const ShardsInfo & getShardsInfo() const { return shards_info; } const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; } @@ -263,7 +262,6 @@ private: /// Inter-server secret String secret; - String hash_of_addresses; /// Description of the cluster shards. ShardsInfo shards_info; /// Any remote shard. diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 0db07267231..884b8445732 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -116,7 +116,7 @@ void executeQuery( const Settings & settings = context->getSettingsRef(); - if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) + if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); std::vector plans; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 9951fb436b5..aa703bcbb89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -126,7 +126,7 @@ DistributedSink::DistributedSink( , log(&Poco::Logger::get("DistributedBlockOutputStream")) { const auto & settings = context->getSettingsRef(); - if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) + if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); context->getClientInfo().distributed_depth += 1; random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 1e08b088b1d..cc3e136ba50 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -16,7 +16,7 @@ struct ExternalDataSourceConfiguration { String host; UInt16 port = 0; - String username; + String username = "default"; String password; String database; String table; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index da648aa4e5c..fcbf22bbd33 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -118,6 +118,7 @@ namespace ErrorCodes extern const int ALTER_OF_COLUMN_IS_FORBIDDEN; extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_LARGE_DISTRIBUTED_DEPTH; } namespace ActionLocks @@ -705,6 +706,9 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) { const Settings & settings = local_context->getSettingsRef(); + if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) + throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + std::shared_ptr storage_src; auto & select = query.select->as(); auto new_query = std::dynamic_pointer_cast(query.clone()); @@ -733,14 +737,34 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer } } - if (!storage_src || storage_src->getClusterName() != getClusterName()) + const Cluster::AddressesWithFailover & src_addresses = storage_src ? storage_src->getCluster()->getShardsAddresses() : Cluster::AddressesWithFailover{}; + const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses(); + /// Compare addresses instead of cluster name, to handle remote()/cluster(). + /// (since for remote()/cluster() the getClusterName() is empty string) + if (src_addresses != dst_addresses) { + /// The warning should be produced only for root queries, + /// since in case of parallel_distributed_insert_select=1, + /// it will produce warning for the rewritten insert, + /// since destination table is still Distributed there. + if (local_context->getClientInfo().distributed_depth == 0) + { + LOG_WARNING(log, + "Parallel distributed INSERT SELECT is not possible " + "(source cluster={} ({} addresses), destination cluster={} ({} addresses))", + storage_src ? storage_src->getClusterName() : "", + src_addresses.size(), + getClusterName(), + dst_addresses.size()); + } return nullptr; } if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL) { new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName()); + /// Reset table function for INSERT INTO remote()/cluster() + new_query->table_function.reset(); } const auto & cluster = getCluster(); @@ -757,12 +781,15 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer new_query_str = buf.str(); } + ContextMutablePtr query_context = Context::createCopy(local_context); + ++query_context->getClientInfo().distributed_depth; + for (size_t shard_index : collections::range(0, shards_info.size())) { const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, local_context); + InterpreterInsertQuery interpreter(new_query, query_context); pipelines.emplace_back(std::make_unique()); pipelines.back()->init(interpreter.execute().pipeline); } @@ -776,7 +803,7 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer /// INSERT SELECT query returns empty block auto remote_query_executor - = std::make_shared(shard_info.pool, std::move(connections), new_query_str, Block{}, local_context); + = std::make_shared(shard_info.pool, std::move(connections), new_query_str, Block{}, query_context); pipelines.emplace_back(std::make_unique()); pipelines.back()->init(Pipe(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote))); pipelines.back()->setSinks([](const Block & header, QueryPipelineBuilder::StreamType) -> ProcessorPtr diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index e47e0fddd6c..45b1cd640ee 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -114,8 +114,6 @@ public: /// Used by InterpreterInsertQuery std::string getRemoteDatabaseName() const { return remote_database; } std::string getRemoteTableName() const { return remote_table; } - /// Returns empty string if tables is used by TableFunctionRemote - std::string getClusterName() const { return cluster_name; } ClusterPtr getCluster() const; /// Used by InterpreterSystemQuery @@ -201,6 +199,7 @@ private: std::optional getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const; size_t getRandomShardIndex(const Cluster::ShardsInfo & shards); + std::string getClusterName() const { return cluster_name.empty() ? "" : cluster_name; } const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; } diff --git a/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference new file mode 100644 index 00000000000..05fbb680c65 --- /dev/null +++ b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference @@ -0,0 +1,27 @@ +-- { echoOn } +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +select * from dst_02224; +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=2; +select * from dst_02224; +1 +1 +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +1 +1 +truncate table dst_02224; +insert into function remote('127.{1,2}', currentDatabase(), dst_02224, key) +select * from remote('127.{1,2}', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +1 +1 diff --git a/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql new file mode 100644 index 00000000000..023f220e930 --- /dev/null +++ b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql @@ -0,0 +1,34 @@ +drop table if exists dst_02224; +drop table if exists src_02224; +create table dst_02224 (key Int) engine=Memory(); +create table src_02224 (key Int) engine=Memory(); +insert into src_02224 values (1); + +-- { echoOn } +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +select * from dst_02224; + +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=2; +select * from dst_02224; + +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; + +truncate table dst_02224; +insert into function remote('127.{1,2}', currentDatabase(), dst_02224, key) +select * from remote('127.{1,2}', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +-- { echoOff } + +drop table src_02224; +drop table dst_02224; From 52ed751d58d228325d17b15961a3f3ea7c2d6ee8 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 8 Mar 2022 16:29:42 +0100 Subject: [PATCH 79/87] Fix installation documentation typo --- docs/_includes/install/deb.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh index 9dceef4c245..0daf12a132f 100644 --- a/docs/_includes/install/deb.sh +++ b/docs/_includes/install/deb.sh @@ -1,11 +1,11 @@ -sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-get install -y apt-transport-https ca-certificates dirmngr sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 -echo "deb https://packages.clickhouse.com/deb stable main/" | sudo tee \ +echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +clickhouse-client # or "clickhouse-client --password" if you've set up a password. From a871036361ea8e57660ecd88f1da5dea29b5ebf4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 8 Mar 2022 18:42:29 +0300 Subject: [PATCH 80/87] Fix `parallel_reading_from_replicas` with `clickhouse-bechmark` (#34751) * Use INITIAL_QUERY for clickhouse-benchmark Signed-off-by: Azat Khuzhin * Fix parallel_reading_from_replicas with clickhouse-bechmark Before it produces the following error: $ clickhouse-benchmark --stacktrace -i1 --query "select * from remote('127.1', default.data_mt) limit 10" --allow_experimental_parallel_reading_from_replicas=1 --max_parallel_replicas=3 Loaded 1 queries. Logical error: 'Coordinator for parallel reading from replicas is not initialized'. Aborted (core dumped) Since it uses the same code, i.e RemoteQueryExecutor -> MultiplexedConnections, which enables coordinator if it was requested from settings, but it should be done only for non-initial queries, i.e. when server send connection to another server. Signed-off-by: Azat Khuzhin * Fix 02226_parallel_reading_from_replicas_benchmark for older shellcheck By shellcheck 0.8 does not complains, while on CI shellcheck 0.7.0 and it does complains [1]: In 02226_parallel_reading_from_replicas_benchmark.sh line 17: --allow_experimental_parallel_reading_from_replicas=1 ^-- SC2191: The = here is literal. To assign by index, use ( [index]=value ) with no spaces. To keep as literal, quote it. Did you mean: "--allow_experimental_parallel_reading_from_replicas=1" [1]: https://s3.amazonaws.com/clickhouse-test-reports/34751/d883af711822faf294c876b017cbf745b1cda1b3/style_check__actions_/shellcheck_output.txt Signed-off-by: Azat Khuzhin --- programs/benchmark/Benchmark.cpp | 2 ++ src/Client/MultiplexedConnections.cpp | 7 ++++- src/QueryPipeline/RemoteQueryExecutor.cpp | 10 ++----- src/QueryPipeline/RemoteQueryExecutor.h | 8 ++++- ..._reading_from_replicas_benchmark.reference | 0 ...arallel_reading_from_replicas_benchmark.sh | 29 +++++++++++++++++++ 6 files changed, 46 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.reference create mode 100755 tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 35ffb97b8e2..60e5ca92f77 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -435,6 +435,8 @@ private: Progress progress; executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); + executor.sendQuery(ClientInfo::QueryKind::INITIAL_QUERY); + ProfileInfo info; while (Block block = executor.read()) info.update(block); diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index d1873ac038d..31fbc609bdc 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -133,7 +133,12 @@ void MultiplexedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - if (settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas) + bool parallel_reading_from_replicas = settings.max_parallel_replicas > 1 + && settings.allow_experimental_parallel_reading_from_replicas + /// To avoid trying to coordinate with clickhouse-benchmark, + /// since it uses the same code. + && client_info.query_kind != ClientInfo::QueryKind::INITIAL_QUERY; + if (parallel_reading_from_replicas) { client_info.collaborate_with_initiator = true; client_info.count_participating_replicas = replica_info.all_replicas_count; diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 142e56ceb25..d1275444b84 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -210,7 +210,7 @@ static Block adaptBlockStructure(const Block & block, const Block & header) return res; } -void RemoteQueryExecutor::sendQuery() +void RemoteQueryExecutor::sendQuery(ClientInfo::QueryKind query_kind) { if (sent_query) return; @@ -237,13 +237,7 @@ void RemoteQueryExecutor::sendQuery() auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); ClientInfo modified_client_info = context->getClientInfo(); - modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - /// Set initial_query_id to query_id for the clickhouse-benchmark. - /// - /// (since first query of clickhouse-benchmark will be issued as SECONDARY_QUERY, - /// due to it executes queries via RemoteBlockInputStream) - if (modified_client_info.initial_query_id.empty()) - modified_client_info.initial_query_id = query_id; + modified_client_info.query_kind = query_kind; if (CurrentThread::isInitialized()) { modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context; diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 655bd5603de..78bc9f611ab 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -83,7 +83,13 @@ public: ~RemoteQueryExecutor(); /// Create connection and send query, external tables and scalars. - void sendQuery(); + /// + /// @param query_kind - kind of query, usually it is SECONDARY_QUERY, + /// since this is the queries between servers + /// (for which this code was written in general). + /// But clickhouse-benchmark uses the same code, + /// and it should pass INITIAL_QUERY. + void sendQuery(ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::SECONDARY_QUERY); /// Query is resent to a replica, the query itself can be modified. std::atomic resent_query { false }; diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.reference b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh new file mode 100755 index 00000000000..2a163746e20 --- /dev/null +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists data_02226; +create table data_02226 (key Int) engine=MergeTree() order by key +as select * from numbers(1); +" + +# Regression for: +# +# Logical error: 'Coordinator for parallel reading from replicas is not initialized'. +opts=( + --allow_experimental_parallel_reading_from_replicas 1 + --max_parallel_replicas 3 + + --iterations 1 +) +$CLICKHOUSE_BENCHMARK --query "select * from remote('127.1', $CLICKHOUSE_DATABASE, data_02226)" "${opts[@]}" >& /dev/null +ret=$? + +$CLICKHOUSE_CLIENT -nm -q " +drop table data_02226; +" + +exit $ret From ced34dea84246c24d776dd58206f79c26ccf7533 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 5 Mar 2022 14:43:06 +0300 Subject: [PATCH 81/87] Take flush_time into account for scheduling background flush of the Buffer Signed-off-by: Azat Khuzhin --- src/Storages/StorageBuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index f97c09471c3..c1f2e14da7c 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -1000,7 +1000,8 @@ void StorageBuffer::reschedule() size_t min = std::max(min_thresholds.time - time_passed, 1); size_t max = std::max(max_thresholds.time - time_passed, 1); - flush_handle->scheduleAfter(std::min(min, max) * 1000); + size_t flush = std::max(flush_thresholds.time - time_passed, 1); + flush_handle->scheduleAfter(std::min({min, max, flush}) * 1000); } void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const From 132bbce29cf26f2e93de852e6ddce9f2e3f0023d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 18 Feb 2022 17:42:32 +0300 Subject: [PATCH 82/87] Add ability to get SELECT query from TableFunctionView Signed-off-by: Azat Khuzhin --- src/TableFunctions/TableFunctionView.cpp | 6 ++++++ src/TableFunctions/TableFunctionView.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 2cab8aeca25..e9fcbb219a3 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -15,6 +15,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + +const ASTSelectWithUnionQuery & TableFunctionView::getSelectQuery() const +{ + return *create.select; +} + void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) { const auto * function = ast_function->as(); diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h index c20b45e7546..4afb049e738 100644 --- a/src/TableFunctions/TableFunctionView.h +++ b/src/TableFunctions/TableFunctionView.h @@ -16,6 +16,9 @@ class TableFunctionView : public ITableFunction public: static constexpr auto name = "view"; std::string getName() const override { return name; } + + const ASTSelectWithUnionQuery & getSelectQuery() const; + private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; const char * getStorageTypeName() const override { return "View"; } From fd3f7347f3e5f9dbc4f1f7279be8687ff85a0560 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 19 Feb 2022 10:48:06 +0300 Subject: [PATCH 83/87] Remove unused DBMS_COMMON_LIBRARIES Fixes: 4f8438bb346 ("cmake: do not allow internal libstdc++ usage") Signed-off-by: Azat Khuzhin --- src/CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b99ffd7ee18..0a831950d5e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -253,18 +253,16 @@ if (TARGET ch_contrib::nuraft) add_object_library(clickhouse_coordination Coordination) endif() -set (DBMS_COMMON_LIBRARIES) - if (USE_STATIC_LIBRARIES OR NOT SPLIT_SHARED_LIBRARIES) add_library (dbms STATIC ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PRIVATE ch_contrib::libdivide ${DBMS_COMMON_LIBRARIES}) + target_link_libraries (dbms PRIVATE ch_contrib::libdivide) if (TARGET ch_contrib::jemalloc) target_link_libraries (dbms PRIVATE ch_contrib::jemalloc) endif() set (all_modules dbms) else() add_library (dbms SHARED ${dbms_headers} ${dbms_sources}) - target_link_libraries (dbms PUBLIC ${all_modules} ${DBMS_COMMON_LIBRARIES}) + target_link_libraries (dbms PUBLIC ${all_modules}) target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::libdivide) if (TARGET ch_contrib::jemalloc) target_link_libraries (clickhouse_interpreters PRIVATE ch_contrib::jemalloc) From 75da778d10fd005703523ddd9837617803be0d69 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 19 Feb 2022 11:02:45 +0300 Subject: [PATCH 84/87] Tiny cmake refactoring Signed-off-by: Azat Khuzhin --- src/CMakeLists.txt | 22 +++++++++++++++++----- src/TableFunctions/CMakeLists.txt | 16 +++++++++------- 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0a831950d5e..ebcd027ed2b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -144,7 +144,6 @@ list (APPEND dbms_sources AggregateFunctions/AggregateFunctionState.cpp AggregateFunctions/AggregateFunctionCount.cpp AggregateFunctions/parseAggregateFunctionParameters.cpp) - list (APPEND dbms_headers AggregateFunctions/IAggregateFunction.h AggregateFunctions/IAggregateFunctionCombinator.h @@ -155,10 +154,23 @@ list (APPEND dbms_headers AggregateFunctions/FactoryHelpers.h AggregateFunctions/parseAggregateFunctionParameters.h) -list (APPEND dbms_sources TableFunctions/ITableFunction.cpp TableFunctions/TableFunctionFactory.cpp) -list (APPEND dbms_headers TableFunctions/ITableFunction.h TableFunctions/TableFunctionFactory.h) -list (APPEND dbms_sources Dictionaries/DictionaryFactory.cpp Dictionaries/DictionarySourceFactory.cpp Dictionaries/DictionaryStructure.cpp Dictionaries/getDictionaryConfigurationFromAST.cpp) -list (APPEND dbms_headers Dictionaries/DictionaryFactory.h Dictionaries/DictionarySourceFactory.h Dictionaries/DictionaryStructure.h Dictionaries/getDictionaryConfigurationFromAST.h) +list (APPEND dbms_sources + TableFunctions/ITableFunction.cpp + TableFunctions/TableFunctionFactory.cpp) +list (APPEND dbms_headers + TableFunctions/ITableFunction.h + TableFunctions/TableFunctionFactory.h) + +list (APPEND dbms_sources + Dictionaries/DictionaryFactory.cpp + Dictionaries/DictionarySourceFactory.cpp + Dictionaries/DictionaryStructure.cpp + Dictionaries/getDictionaryConfigurationFromAST.cpp) +list (APPEND dbms_headers + Dictionaries/DictionaryFactory.h + Dictionaries/DictionarySourceFactory.h + Dictionaries/DictionaryStructure.h + Dictionaries/getDictionaryConfigurationFromAST.h) if (NOT ENABLE_SSL) list (REMOVE_ITEM clickhouse_common_io_sources Common/OpenSSLHelpers.cpp) diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index c9948a4b131..21d329667a5 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -4,14 +4,16 @@ if (TARGET ch_contrib::hivemetastore) add_headers_and_sources(clickhouse_table_functions Hive) endif () -list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp) -list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) +list(REMOVE_ITEM clickhouse_table_functions_sources + ITableFunction.cpp + TableFunctionFactory.cpp) +list(REMOVE_ITEM clickhouse_table_functions_headers + ITableFunction.h + TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) + +target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) if (TARGET ch_contrib::hivemetastore) - target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms ch_contrib::hivemetastore ch_contrib::hdfs) -else () - target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) + target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::hivemetastore ch_contrib::hdfs) endif () - - From 4843e210c322b07fcd3a899afa35cf7a31109441 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 18 Feb 2022 17:42:48 +0300 Subject: [PATCH 85/87] Support view() for parallel_distributed_insert_select Signed-off-by: Azat Khuzhin --- src/CMakeLists.txt | 2 ++ src/Storages/StorageDistributed.cpp | 26 ++++++++++---- src/TableFunctions/CMakeLists.txt | 2 ++ ...l_distributed_insert_select_view.reference | 4 +++ ...parallel_distributed_insert_select_view.sh | 35 +++++++++++++++++++ 5 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference create mode 100755 tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ebcd027ed2b..e18914740ff 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -156,9 +156,11 @@ list (APPEND dbms_headers list (APPEND dbms_sources TableFunctions/ITableFunction.cpp + TableFunctions/TableFunctionView.cpp TableFunctions/TableFunctionFactory.cpp) list (APPEND dbms_headers TableFunctions/ITableFunction.h + TableFunctions/TableFunctionView.h TableFunctions/TableFunctionFactory.h) list (APPEND dbms_sources diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index fcbf22bbd33..5bfb3b4ce45 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -56,6 +56,8 @@ #include #include #include +#include +#include #include #include @@ -723,15 +725,27 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); if (storage_src) { - const auto select_with_union_query = std::make_shared(); - select_with_union_query->list_of_selects = std::make_shared(); + /// Unwrap view() function. + if (storage_src->remote_table_function_ptr) + { + const TableFunctionPtr src_table_function = + TableFunctionFactory::instance().get(storage_src->remote_table_function_ptr, local_context); + const TableFunctionView * view_function = + assert_cast(src_table_function.get()); + new_query->select = view_function->getSelectQuery().clone(); + } + else + { + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); - auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); - select_with_union_query->list_of_selects->children.push_back(new_select_query); + auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); + select_with_union_query->list_of_selects->children.push_back(new_select_query); - new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); + new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); - new_query->select = select_with_union_query; + new_query->select = select_with_union_query; + } } } } diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index 21d329667a5..c58f93e310a 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -6,9 +6,11 @@ endif () list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp + TableFunctionView.cpp TableFunctionFactory.cpp) list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h + TableFunctionView.h TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh new file mode 100755 index 00000000000..376a49fd820 --- /dev/null +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# NOTE: sh test is required since view() does not have current database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists dst_02225; +drop table if exists src_02225; +create table dst_02225 (key Int) engine=Memory(); +create table src_02225 (key Int) engine=Memory(); +insert into src_02225 values (1); +" + +$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -nm -q " +truncate table dst_02225; +insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) +select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225), key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02225; + +-- w/o sharding key +truncate table dst_02225; +insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) +select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225)) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02225; +" + +$CLICKHOUSE_CLIENT -nm -q " +drop table src_02225; +drop table dst_02225; +" From a19224bc9b3305a8381c8b758315a7b0ac998da7 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Wed, 9 Mar 2022 15:48:42 +0800 Subject: [PATCH 86/87] Fix partition pruner: non-monotonic function IN --- src/Functions/IFunction.h | 4 ++-- src/Interpreters/Set.cpp | 5 +++-- src/Interpreters/Set.h | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- .../02232_partition_pruner_single_point.reference | 2 ++ .../02232_partition_pruner_single_point.sql | 14 ++++++++++++++ 6 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 tests/queries/0_stateless/02232_partition_pruner_single_point.reference create mode 100644 tests/queries/0_stateless/02232_partition_pruner_single_point.sql diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 8063ad77ad0..71af6149774 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -267,7 +267,7 @@ public: */ virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { - throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED); } }; @@ -452,7 +452,7 @@ public: using Monotonicity = IFunctionBase::Monotonicity; virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { - throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED); } /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 32dac7f9e9b..7af3e23d0d4 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -445,7 +445,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector & key_ranges, const DataTypes & data_types) const +BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, const DataTypes & data_types, bool single_point) const { size_t tuple_size = indexes_mapping.size(); @@ -468,7 +468,8 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, std::optional new_range = KeyCondition::applyMonotonicFunctionsChainToRange( key_ranges[indexes_mapping[i].key_index], indexes_mapping[i].functions, - data_types[indexes_mapping[i].key_index]); + data_types[indexes_mapping[i].key_index], + single_point); if (!new_range) return {true, true}; diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 3146b6af03f..2eecb0211a4 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -214,7 +214,7 @@ public: bool hasMonotonicFunctionsChain() const; - BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types) const; + BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types, bool single_point = false) const; private: // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element. diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 323b59e2902..c17eb5a981e 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -448,7 +448,7 @@ KeyCondition::KeyCondition( { for (size_t i = 0, size = key_column_names.size(); i < size; ++i) { - std::string name = key_column_names[i]; + const auto & name = key_column_names[i]; if (!key_columns.count(name)) key_columns[name] = i; } @@ -1999,7 +1999,7 @@ BoolMask KeyCondition::checkInHyperrectangle( if (!element.set_index) throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); - rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types)); + rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types, single_point)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) rpn_stack.back() = !rpn_stack.back(); } diff --git a/tests/queries/0_stateless/02232_partition_pruner_single_point.reference b/tests/queries/0_stateless/02232_partition_pruner_single_point.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/02232_partition_pruner_single_point.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/02232_partition_pruner_single_point.sql b/tests/queries/0_stateless/02232_partition_pruner_single_point.sql new file mode 100644 index 00000000000..0400d0e1b59 --- /dev/null +++ b/tests/queries/0_stateless/02232_partition_pruner_single_point.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS lower_test; + +CREATE TABLE lower_test ( + a Int32, + b String +) ENGINE=MergeTree +PARTITION BY b +ORDER BY a; + +INSERT INTO lower_test (a,b) VALUES (1,'A'),(2,'B'),(3,'C'); + +SELECT a FROM lower_test WHERE lower(b) IN ('a','b') order by a; + +DROP TABLE lower_test; From d749295222fbfae16ea0bd82c4b16cdbd5b2f95f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 9 Mar 2022 12:35:23 +0100 Subject: [PATCH 87/87] Fix hardcoded page size (#35129) --- src/Common/PODArray.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index b312fbda21c..9b15782a231 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -196,7 +197,7 @@ protected: /// The operation is slow and performed only for debug builds. void protectImpl(int prot) { - static constexpr size_t PROTECT_PAGE_SIZE = 4096; + static size_t PROTECT_PAGE_SIZE = ::getPageSize(); char * left_rounded_up = reinterpret_cast((reinterpret_cast(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE); char * right_rounded_down = reinterpret_cast((reinterpret_cast(c_end_of_storage) + pad_right) / PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);