diff --git a/README.md b/README.md index adb1c166f12..0b871fb428d 100644 --- a/README.md +++ b/README.md @@ -13,5 +13,6 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events * [ClickHouse on HighLoad++ Siberia](https://www.highload.ru/siberia/2019/abstracts/5348) on June 24-25. -* [ClickHouse Community Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. -* [ClickHouse Community Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. +* [ClickHouse Meetup in Novosibirsk](https://events.yandex.ru/events/ClickHouse/26-June-2019/) on June 26. +* [ClickHouse Meetup in Shenzhen](https://www.huodongxing.com/event/3483759917300) on October 20. +* [ClickHouse Meetup in Shanghai](https://www.huodongxing.com/event/4483760336000) on October 27. diff --git a/dbms/programs/benchmark/Benchmark.cpp b/dbms/programs/benchmark/Benchmark.cpp index 85206f1330a..019080e2391 100644 --- a/dbms/programs/benchmark/Benchmark.cpp +++ b/dbms/programs/benchmark/Benchmark.cpp @@ -54,10 +54,10 @@ public: const String & host_, UInt16 port_, bool secure_, const String & default_database_, const String & user_, const String & password_, const String & stage, bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, const ConnectionTimeouts & timeouts, const Settings & settings_) + const String & json_path_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), - connections(concurrency, host_, port_, default_database_, user_, password_, timeouts, "benchmark", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), + connections(concurrency, host_, port_, default_database_, user_, password_, "benchmark", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable), randomize(randomize_), max_iterations(max_iterations_), max_time(max_time_), json_path(json_path_), settings(settings_), global_context(Context::createGlobal()), pool(concurrency) { @@ -240,7 +240,8 @@ private: std::uniform_int_distribution distribution(0, queries.size() - 1); for (size_t i = 0; i < concurrency; ++i) - pool.schedule(std::bind(&Benchmark::thread, this, connections.get())); + pool.schedule(std::bind(&Benchmark::thread, this, + connections.get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); InterruptListener interrupt_listener; info_per_interval.watch.restart(); @@ -310,7 +311,9 @@ private: void execute(ConnectionPool::Entry & connection, Query & query) { Stopwatch watch; - RemoteBlockInputStream stream(*connection, query, {}, global_context, &settings, nullptr, Tables(), query_processing_stage); + RemoteBlockInputStream stream( + *connection, + query, {}, global_context, &settings, nullptr, Tables(), query_processing_stage); Progress progress; stream.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); @@ -485,7 +488,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["iterations"].as(), options["timelimit"].as(), options["json"].as(), - ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings), settings); return benchmark.run(); } diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 2c421cc20b3..69cdf5b1355 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -204,7 +204,6 @@ private: ConnectionParameters connection_parameters; - void initialize(Poco::Util::Application & self) { Poco::Util::Application::initialize(self); @@ -337,7 +336,7 @@ private: DateLUT::instance(); if (!context.getSettingsRef().use_client_time_zone) { - const auto & time_zone = connection->getServerTimezone(); + const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts); if (!time_zone.empty()) { try @@ -521,7 +520,6 @@ private: connection_parameters.default_database, connection_parameters.user, connection_parameters.password, - connection_parameters.timeouts, "client", connection_parameters.compression, connection_parameters.security); @@ -537,11 +535,14 @@ private: connection->setThrottler(throttler); } - connection->getServerVersion(server_name, server_version_major, server_version_minor, server_version_patch, server_revision); + connection->getServerVersion(connection_parameters.timeouts, + server_name, server_version_major, server_version_minor, server_version_patch, server_revision); server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch); - if (server_display_name = connection->getServerDisplayName(); server_display_name.length() == 0) + if ( + server_display_name = connection->getServerDisplayName(connection_parameters.timeouts); + server_display_name.length() == 0) { server_display_name = config().getString("host", "localhost"); } @@ -752,7 +753,7 @@ private: } if (!test_hint.checkActual(actual_server_error, actual_client_error, got_exception, last_exception)) - connection->forceConnected(); + connection->forceConnected(connection_parameters.timeouts); if (got_exception && !ignore_error) { @@ -828,7 +829,7 @@ private: if (with_output && with_output->settings_ast) apply_query_settings(*with_output->settings_ast); - connection->forceConnected(); + connection->forceConnected(connection_parameters.timeouts); /// INSERT query for which data transfer is needed (not an INSERT SELECT) is processed separately. if (insert && !insert->select) @@ -899,7 +900,7 @@ private: /// Process the query that doesn't require transferring data blocks to the server. void processOrdinaryQuery() { - connection->sendQuery(query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); + connection->sendQuery(connection_parameters.timeouts, query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); sendExternalTables(); receiveResult(); } @@ -917,7 +918,7 @@ private: if (!parsed_insert_query.data && (is_interactive || (stdin_is_not_tty && std_in.eof()))) throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); - connection->sendQuery(query_without_data, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); + connection->sendQuery(connection_parameters.timeouts, query_without_data, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true); sendExternalTables(); /// Receive description of table structure. @@ -1064,7 +1065,7 @@ private: bool cancelled = false; // TODO: get the poll_interval from commandline. - const auto receive_timeout = connection->getTimeouts().receive_timeout; + const auto receive_timeout = connection_parameters.timeouts.receive_timeout; constexpr size_t default_poll_interval = 1000000; /// in microseconds constexpr size_t min_poll_interval = 5000; /// in microseconds const size_t poll_interval diff --git a/dbms/programs/client/Suggest.h b/dbms/programs/client/Suggest.h index 6120f875d57..975d0611253 100644 --- a/dbms/programs/client/Suggest.h +++ b/dbms/programs/client/Suggest.h @@ -14,6 +14,7 @@ #include #include #include +#include namespace DB @@ -42,7 +43,7 @@ private: "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE" }; - /// Words are fetched asynchonously. + /// Words are fetched asynchronously. std::thread loading_thread; std::atomic ready{false}; @@ -71,7 +72,7 @@ private: return word; } - void loadImpl(Connection & connection, size_t suggestion_limit) + void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) { std::stringstream query; query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" @@ -104,12 +105,12 @@ private: query << ") WHERE notEmpty(res)"; - fetch(connection, query.str()); + fetch(connection, timeouts, query.str()); } - void fetch(Connection & connection, const std::string & query) + void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) { - connection.sendQuery(query); + connection.sendQuery(timeouts, query); while (true) { @@ -175,12 +176,11 @@ public: connection_parameters.default_database, connection_parameters.user, connection_parameters.password, - connection_parameters.timeouts, "client", connection_parameters.compression, connection_parameters.security); - loadImpl(connection, suggestion_limit); + loadImpl(connection, connection_parameters.timeouts, suggestion_limit); } catch (...) { diff --git a/dbms/programs/copier/ClusterCopier.cpp b/dbms/programs/copier/ClusterCopier.cpp index 9cd3629192f..0b78e4a54cf 100644 --- a/dbms/programs/copier/ClusterCopier.cpp +++ b/dbms/programs/copier/ClusterCopier.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -798,13 +799,13 @@ public: } - void discoverShardPartitions(const TaskShardPtr & task_shard) + void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard) { TaskTable & task_table = task_shard->task_table; LOG_INFO(log, "Discover partitions of shard " << task_shard->getDescription()); - auto get_partitions = [&] () { return getShardPartitions(*task_shard); }; + auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); }; auto existing_partitions_names = retry(get_partitions, 60); Strings filtered_partitions_names; Strings missing_partitions; @@ -880,14 +881,14 @@ public: } /// Compute set of partitions, assume set of partitions aren't changed during the processing - void discoverTablePartitions(TaskTable & task_table, UInt64 num_threads = 0) + void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0) { /// Fetch partitions list from a shard { ThreadPool thread_pool(num_threads ? num_threads : 2 * getNumberOfPhysicalCPUCores()); for (const TaskShardPtr & task_shard : task_table.all_shards) - thread_pool.schedule([this, task_shard]() { discoverShardPartitions(task_shard); }); + thread_pool.schedule([this, timeouts, task_shard]() { discoverShardPartitions(timeouts, task_shard); }); LOG_DEBUG(log, "Waiting for " << thread_pool.active() << " setup jobs"); thread_pool.wait(); @@ -955,7 +956,7 @@ public: task_descprtion_current_version = version_to_update; } - void process() + void process(const ConnectionTimeouts & timeouts) { for (TaskTable & task_table : task_cluster->table_tasks) { @@ -969,7 +970,7 @@ public: if (!task_table.has_enabled_partitions) { /// If there are no specified enabled_partitions, we must discover them manually - discoverTablePartitions(task_table); + discoverTablePartitions(timeouts, task_table); /// After partitions of each shard are initialized, initialize cluster partitions for (const TaskShardPtr & task_shard : task_table.all_shards) @@ -1009,7 +1010,7 @@ public: bool table_is_done = false; for (UInt64 num_table_tries = 0; num_table_tries < max_table_tries; ++num_table_tries) { - if (tryProcessTable(task_table)) + if (tryProcessTable(timeouts, task_table)) { table_is_done = true; break; @@ -1053,8 +1054,10 @@ protected: return getWorkersPath() + "/" + host_id; } - zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(const zkutil::ZooKeeperPtr & zookeeper, - const String & description, bool unprioritized) + zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed( + const zkutil::ZooKeeperPtr & zookeeper, + const String & description, + bool unprioritized) { std::chrono::milliseconds current_sleep_time = default_sleep_time; static constexpr std::chrono::milliseconds max_sleep_time(30000); // 30 sec @@ -1329,7 +1332,7 @@ protected: static constexpr UInt64 max_table_tries = 1000; static constexpr UInt64 max_shard_partition_tries = 600; - bool tryProcessTable(TaskTable & task_table) + bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table) { /// An heuristic: if previous shard is already done, then check next one without sleeps due to max_workers constraint bool previous_shard_is_instantly_finished = false; @@ -1360,7 +1363,7 @@ protected: /// If not, did we check existence of that partition previously? if (shard->checked_partitions.count(partition_name) == 0) { - auto check_shard_has_partition = [&] () { return checkShardHasPartition(*shard, partition_name); }; + auto check_shard_has_partition = [&] () { return checkShardHasPartition(timeouts, *shard, partition_name); }; bool has_partition = retry(check_shard_has_partition); shard->checked_partitions.emplace(partition_name); @@ -1397,7 +1400,7 @@ protected: bool was_error = false; for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num) { - task_status = tryProcessPartitionTask(partition, is_unprioritized_task); + task_status = tryProcessPartitionTask(timeouts, partition, is_unprioritized_task); /// Exit if success if (task_status == PartitionTaskStatus::Finished) @@ -1483,13 +1486,13 @@ protected: Error, }; - PartitionTaskStatus tryProcessPartitionTask(ShardPartition & task_partition, bool is_unprioritized_task) + PartitionTaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, bool is_unprioritized_task) { PartitionTaskStatus res; try { - res = processPartitionTaskImpl(task_partition, is_unprioritized_task); + res = processPartitionTaskImpl(timeouts, task_partition, is_unprioritized_task); } catch (...) { @@ -1510,7 +1513,7 @@ protected: return res; } - PartitionTaskStatus processPartitionTaskImpl(ShardPartition & task_partition, bool is_unprioritized_task) + PartitionTaskStatus processPartitionTaskImpl(const ConnectionTimeouts & timeouts, ShardPartition & task_partition, bool is_unprioritized_task) { TaskShard & task_shard = task_partition.task_shard; TaskTable & task_table = task_shard.task_table; @@ -1611,7 +1614,7 @@ protected: zookeeper->createAncestors(current_task_status_path); /// We need to update table definitions for each partition, it could be changed after ALTER - createShardInternalTables(task_shard); + createShardInternalTables(timeouts, task_shard); /// Check that destination partition is empty if we are first worker /// NOTE: this check is incorrect if pull and push tables have different partition key! @@ -1828,23 +1831,25 @@ protected: return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); } - ASTPtr getCreateTableForPullShard(TaskShard & task_shard) + ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard) { /// Fetch and parse (possibly) new definition - auto connection_entry = task_shard.info.pool->get(&task_cluster->settings_pull); - String create_query_pull_str = getRemoteCreateTable(task_shard.task_table.table_pull, *connection_entry, - &task_cluster->settings_pull); + auto connection_entry = task_shard.info.pool->get(timeouts, &task_cluster->settings_pull); + String create_query_pull_str = getRemoteCreateTable( + task_shard.task_table.table_pull, + *connection_entry, + &task_cluster->settings_pull); ParserCreateQuery parser_create_query; return parseQuery(parser_create_query, create_query_pull_str, 0); } - void createShardInternalTables(TaskShard & task_shard, bool create_split = true) + void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true) { TaskTable & task_table = task_shard.task_table; /// We need to update table definitions for each part, it could be changed after ALTER - task_shard.current_pull_table_create_query = getCreateTableForPullShard(task_shard); + task_shard.current_pull_table_create_query = getCreateTableForPullShard(timeouts, task_shard); /// Create local Distributed tables: /// a table fetching data from current shard and a table inserting data to the whole destination cluster @@ -1872,9 +1877,9 @@ protected: } - std::set getShardPartitions(TaskShard & task_shard) + std::set getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard) { - createShardInternalTables(task_shard, false); + createShardInternalTables(timeouts, task_shard, false); TaskTable & task_table = task_shard.task_table; @@ -1914,9 +1919,9 @@ protected: return res; } - bool checkShardHasPartition(TaskShard & task_shard, const String & partition_quoted_name) + bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name) { - createShardInternalTables(task_shard, false); + createShardInternalTables(timeouts, task_shard, false); TaskTable & task_table = task_shard.task_table; @@ -1998,7 +2003,8 @@ protected: Settings current_settings = settings ? *settings : task_cluster->settings_common; current_settings.max_parallel_replicas = num_remote_replicas ? num_remote_replicas : 1; - auto connections = shard.pool->getMany(¤t_settings, pool_mode); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings).getSaturated(current_settings.max_execution_time); + auto connections = shard.pool->getMany(timeouts, ¤t_settings, pool_mode); for (auto & connection : connections) { @@ -2187,7 +2193,7 @@ void ClusterCopierApp::mainImpl() copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false)); copier->init(); - copier->process(); + copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef())); } diff --git a/dbms/programs/performance-test/PerformanceTest.cpp b/dbms/programs/performance-test/PerformanceTest.cpp index 56ac11284e1..c2d8d4f252c 100644 --- a/dbms/programs/performance-test/PerformanceTest.cpp +++ b/dbms/programs/performance-test/PerformanceTest.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ namespace void waitQuery(Connection & connection) { bool finished = false; + while (true) { if (!connection.poll(1000000)) @@ -50,12 +52,14 @@ namespace fs = boost::filesystem; PerformanceTest::PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, + const ConnectionTimeouts & timeouts_, InterruptListener & interrupt_listener_, const PerformanceTestInfo & test_info_, Context & context_, const std::vector & queries_to_run_) : config(config_) , connection(connection_) + , timeouts(timeouts_) , interrupt_listener(interrupt_listener_) , test_info(test_info_) , context(context_) @@ -108,7 +112,7 @@ bool PerformanceTest::checkPreconditions() const size_t exist = 0; - connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); while (true) { @@ -188,7 +192,7 @@ void PerformanceTest::prepare() const for (const auto & query : test_info.create_and_fill_queries) { LOG_INFO(log, "Executing create or fill query \"" << query << '\"'); - connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); waitQuery(connection); LOG_INFO(log, "Query finished"); } @@ -200,7 +204,7 @@ void PerformanceTest::finish() const for (const auto & query : test_info.drop_queries) { LOG_INFO(log, "Executing drop query \"" << query << '\"'); - connection.sendQuery(query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); + connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, &test_info.settings, nullptr, false); waitQuery(connection); LOG_INFO(log, "Query finished"); } diff --git a/dbms/programs/performance-test/PerformanceTest.h b/dbms/programs/performance-test/PerformanceTest.h index 6368c1f1040..961a348d099 100644 --- a/dbms/programs/performance-test/PerformanceTest.h +++ b/dbms/programs/performance-test/PerformanceTest.h @@ -5,6 +5,7 @@ #include #include +#include #include "PerformanceTestInfo.h" namespace DB @@ -20,6 +21,7 @@ public: PerformanceTest( const XMLConfigurationPtr & config_, Connection & connection_, + const ConnectionTimeouts & timeouts_, InterruptListener & interrupt_listener_, const PerformanceTestInfo & test_info_, Context & context_, @@ -45,6 +47,7 @@ private: private: XMLConfigurationPtr config; Connection & connection; + const ConnectionTimeouts & timeouts; InterruptListener & interrupt_listener; PerformanceTestInfo test_info; diff --git a/dbms/programs/performance-test/PerformanceTestSuite.cpp b/dbms/programs/performance-test/PerformanceTestSuite.cpp index ef0ee715c49..cfa7d202d1d 100644 --- a/dbms/programs/performance-test/PerformanceTestSuite.cpp +++ b/dbms/programs/performance-test/PerformanceTestSuite.cpp @@ -72,10 +72,11 @@ public: Strings && tests_names_regexp_, Strings && skip_names_regexp_, const std::unordered_map> query_indexes_, - const ConnectionTimeouts & timeouts) + const ConnectionTimeouts & timeouts_) : connection(host_, port_, default_database_, user_, - password_, timeouts, "performance-test", Protocol::Compression::Enable, + password_, "performance-test", Protocol::Compression::Enable, secure_ ? Protocol::Secure::Enable : Protocol::Secure::Disable) + , timeouts(timeouts_) , tests_tags(std::move(tests_tags_)) , tests_names(std::move(tests_names_)) , tests_names_regexp(std::move(tests_names_regexp_)) @@ -100,7 +101,7 @@ public: UInt64 version_minor; UInt64 version_patch; UInt64 version_revision; - connection.getServerVersion(name, version_major, version_minor, version_patch, version_revision); + connection.getServerVersion(timeouts, name, version_major, version_minor, version_patch, version_revision); std::stringstream ss; ss << version_major << "." << version_minor << "." << version_patch; @@ -115,6 +116,7 @@ public: private: Connection connection; + const ConnectionTimeouts & timeouts; const Strings & tests_tags; const Strings & tests_names; @@ -195,7 +197,7 @@ private: { PerformanceTestInfo info(test_config, profiles_file, global_context.getSettingsRef()); LOG_INFO(log, "Config for test '" << info.test_name << "' parsed"); - PerformanceTest current(test_config, connection, interrupt_listener, info, global_context, query_indexes[info.path]); + PerformanceTest current(test_config, connection, timeouts, interrupt_listener, info, global_context, query_indexes[info.path]); if (current.checkPreconditions()) { diff --git a/dbms/programs/server/MySQLHandler.cpp b/dbms/programs/server/MySQLHandler.cpp index 0e832f619fc..a935f13e82a 100644 --- a/dbms/programs/server/MySQLHandler.cpp +++ b/dbms/programs/server/MySQLHandler.cpp @@ -48,6 +48,7 @@ MySQLHandler::MySQLHandler(IServer & server_, const Poco::Net::StreamSocket & so void MySQLHandler::run() { connection_context = server.context(); + connection_context.setSessionContext(connection_context); connection_context.setDefaultFormat("MySQLWire"); in = std::make_shared(socket()); @@ -306,7 +307,7 @@ void MySQLHandler::authenticate(const HandshakeResponse & handshake_response, co try { connection_context.setUser(handshake_response.username, password, socket().address(), ""); - connection_context.setCurrentDatabase(handshake_response.database); + if (!handshake_response.database.empty()) connection_context.setCurrentDatabase(handshake_response.database); connection_context.setCurrentQueryId(""); LOG_ERROR(log, "Authentication for user " << handshake_response.username << " succeeded."); } diff --git a/dbms/programs/server/MySQLHandler.h b/dbms/programs/server/MySQLHandler.h index 79cba54e656..f55906f7428 100644 --- a/dbms/programs/server/MySQLHandler.h +++ b/dbms/programs/server/MySQLHandler.h @@ -43,8 +43,8 @@ private: size_t connection_id = 0; - size_t server_capability_flags; - size_t client_capability_flags; + size_t server_capability_flags = 0; + size_t client_capability_flags = 0; RSA & public_key; RSA & private_key; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index a7b096a69d3..69b8a25e5c3 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -411,6 +411,7 @@ public: return "covarSamp"; if constexpr (StatFunc::kind == StatisticsFunctionKind::corr) return "corr"; + __builtin_unreachable(); } DataTypePtr getReturnType() const override diff --git a/dbms/src/Client/Connection.cpp b/dbms/src/Client/Connection.cpp index a76803baef2..9651ef54e1b 100644 --- a/dbms/src/Client/Connection.cpp +++ b/dbms/src/Client/Connection.cpp @@ -48,7 +48,7 @@ namespace ErrorCodes } -void Connection::connect() +void Connection::connect(const ConnectionTimeouts & timeouts) { try { @@ -230,10 +230,15 @@ UInt16 Connection::getPort() const return port; } -void Connection::getServerVersion(String & name, UInt64 & version_major, UInt64 & version_minor, UInt64 & version_patch, UInt64 & revision) +void Connection::getServerVersion(const ConnectionTimeouts & timeouts, + String & name, + UInt64 & version_major, + UInt64 & version_minor, + UInt64 & version_patch, + UInt64 & revision) { if (!connected) - connect(); + connect(timeouts); name = server_name; version_major = server_version_major; @@ -242,40 +247,40 @@ void Connection::getServerVersion(String & name, UInt64 & version_major, UInt64 revision = server_revision; } -UInt64 Connection::getServerRevision() +UInt64 Connection::getServerRevision(const ConnectionTimeouts & timeouts) { if (!connected) - connect(); + connect(timeouts); return server_revision; } -const String & Connection::getServerTimezone() +const String & Connection::getServerTimezone(const ConnectionTimeouts & timeouts) { if (!connected) - connect(); + connect(timeouts); return server_timezone; } -const String & Connection::getServerDisplayName() +const String & Connection::getServerDisplayName(const ConnectionTimeouts & timeouts) { if (!connected) - connect(); + connect(timeouts); return server_display_name; } -void Connection::forceConnected() +void Connection::forceConnected(const ConnectionTimeouts & timeouts) { if (!connected) { - connect(); + connect(timeouts); } else if (!ping()) { LOG_TRACE(log_wrapper.get(), "Connection was closed, will reconnect."); - connect(); + connect(timeouts); } } @@ -318,10 +323,11 @@ bool Connection::ping() return true; } -TablesStatusResponse Connection::getTablesStatus(const TablesStatusRequest & request) +TablesStatusResponse Connection::getTablesStatus(const ConnectionTimeouts & timeouts, + const TablesStatusRequest & request) { if (!connected) - connect(); + connect(timeouts); TimeoutSetter timeout_setter(*socket, sync_request_timeout, true); @@ -344,6 +350,7 @@ TablesStatusResponse Connection::getTablesStatus(const TablesStatusRequest & req void Connection::sendQuery( + const ConnectionTimeouts & timeouts, const String & query, const String & query_id_, UInt64 stage, @@ -352,7 +359,9 @@ void Connection::sendQuery( bool with_pending_data) { if (!connected) - connect(); + connect(timeouts); + + TimeoutSetter timeout_setter(*socket, timeouts.send_timeout, timeouts.receive_timeout, true); if (settings) { diff --git a/dbms/src/Client/Connection.h b/dbms/src/Client/Connection.h index 4139bcacae2..2338e4c8965 100644 --- a/dbms/src/Client/Connection.h +++ b/dbms/src/Client/Connection.h @@ -57,7 +57,6 @@ public: Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, - const ConnectionTimeouts & timeouts_, const String & client_name_ = "client", Protocol::Compression compression_ = Protocol::Compression::Enable, Protocol::Secure secure_ = Protocol::Secure::Disable, @@ -68,7 +67,6 @@ public: client_name(client_name_), compression(compression_), secure(secure_), - timeouts(timeouts_), sync_request_timeout(sync_request_timeout_), log_wrapper(*this) { @@ -106,11 +104,16 @@ public: /// Change default database. Changes will take effect on next reconnect. void setDefaultDatabase(const String & database); - void getServerVersion(String & name, UInt64 & version_major, UInt64 & version_minor, UInt64 & version_patch, UInt64 & revision); - UInt64 getServerRevision(); + void getServerVersion(const ConnectionTimeouts & timeouts, + String & name, + UInt64 & version_major, + UInt64 & version_minor, + UInt64 & version_patch, + UInt64 & revision); + UInt64 getServerRevision(const ConnectionTimeouts & timeouts); - const String & getServerTimezone(); - const String & getServerDisplayName(); + const String & getServerTimezone(const ConnectionTimeouts & timeouts); + const String & getServerDisplayName(const ConnectionTimeouts & timeouts); /// For log and exception messages. const String & getDescription() const; @@ -118,14 +121,9 @@ public: UInt16 getPort() const; const String & getDefaultDatabase() const; - /// For proper polling. - inline const auto & getTimeouts() const - { - return timeouts; - } - /// If last flag is true, you need to call sendExternalTablesData after. void sendQuery( + const ConnectionTimeouts & timeouts, const String & query, const String & query_id_ = "", UInt64 stage = QueryProcessingStage::Complete, @@ -156,9 +154,10 @@ public: Packet receivePacket(); /// If not connected yet, or if connection is broken - then connect. If cannot connect - throw an exception. - void forceConnected(); + void forceConnected(const ConnectionTimeouts & timeouts); - TablesStatusResponse getTablesStatus(const TablesStatusRequest & request); + TablesStatusResponse getTablesStatus(const ConnectionTimeouts & timeouts, + const TablesStatusRequest & request); /** Disconnect. * This may be used, if connection is left in unsynchronised state @@ -216,7 +215,6 @@ private: */ ThrottlerPtr throttler; - ConnectionTimeouts timeouts; Poco::Timespan sync_request_timeout; /// From where to read query execution result. @@ -252,7 +250,7 @@ private: LoggerWrapper log_wrapper; - void connect(); + void connect(const ConnectionTimeouts & timeouts); void sendHello(); void receiveHello(); bool ping(); diff --git a/dbms/src/Client/ConnectionPool.h b/dbms/src/Client/ConnectionPool.h index 83d527a9e28..d18be29b2b3 100644 --- a/dbms/src/Client/ConnectionPool.h +++ b/dbms/src/Client/ConnectionPool.h @@ -30,7 +30,9 @@ public: /// Selects the connection to work. /// If force_connected is false, the client must manually ensure that returned connection is good. - virtual Entry get(const Settings * settings = nullptr, bool force_connected = true) = 0; + virtual Entry get(const ConnectionTimeouts & timeouts, + const Settings * settings = nullptr, + bool force_connected = true) = 0; }; using ConnectionPoolPtr = std::shared_ptr; @@ -50,7 +52,6 @@ public: const String & default_database_, const String & user_, const String & password_, - const ConnectionTimeouts & timeouts, const String & client_name_ = "client", Protocol::Compression compression_ = Protocol::Compression::Enable, Protocol::Secure secure_ = Protocol::Secure::Disable) @@ -63,12 +64,13 @@ public: password(password_), client_name(client_name_), compression(compression_), - secure{secure_}, - timeouts(timeouts) + secure{secure_} { } - Entry get(const Settings * settings = nullptr, bool force_connected = true) override + Entry get(const ConnectionTimeouts & timeouts, + const Settings * settings = nullptr, + bool force_connected = true) override { Entry entry; if (settings) @@ -77,7 +79,7 @@ public: entry = Base::get(-1); if (force_connected) - entry->forceConnected(); + entry->forceConnected(timeouts); return entry; } @@ -93,7 +95,7 @@ protected: { return std::make_shared( host, port, - default_database, user, password, timeouts, + default_database, user, password, client_name, compression, secure); } @@ -108,7 +110,6 @@ private: Protocol::Compression compression; /// Whether to compress data when interacting with the server. Protocol::Secure secure; /// Whether to encrypt data when interacting with the server. - ConnectionTimeouts timeouts; }; } diff --git a/dbms/src/Client/ConnectionPoolWithFailover.cpp b/dbms/src/Client/ConnectionPoolWithFailover.cpp index 9c12ed31560..f746d3f074e 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.cpp +++ b/dbms/src/Client/ConnectionPoolWithFailover.cpp @@ -8,6 +8,8 @@ #include #include +#include + namespace ProfileEvents { @@ -29,9 +31,8 @@ namespace ErrorCodes ConnectionPoolWithFailover::ConnectionPoolWithFailover( ConnectionPoolPtrs nested_pools_, LoadBalancing load_balancing, - size_t max_tries_, time_t decrease_error_period_) - : Base(std::move(nested_pools_), max_tries_, decrease_error_period_, &Logger::get("ConnectionPoolWithFailover")) + : Base(std::move(nested_pools_), decrease_error_period_, &Logger::get("ConnectionPoolWithFailover")) , default_load_balancing(load_balancing) { const std::string & local_hostname = getFQDNOrHostName(); @@ -44,11 +45,13 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( } } -IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings, bool /*force_connected*/) +IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts & timeouts, + const Settings * settings, + bool /*force_connected*/) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) { - return tryGetEntry(pool, fail_message, settings); + return tryGetEntry(pool, timeouts, fail_message, settings); }; GetPriorityFunc get_priority; @@ -70,11 +73,13 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const Settings * settings return Base::get(try_get_entry, get_priority); } -std::vector ConnectionPoolWithFailover::getMany(const Settings * settings, PoolMode pool_mode) +std::vector ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts, + const Settings * settings, + PoolMode pool_mode) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) { - return tryGetEntry(pool, fail_message, settings); + return tryGetEntry(pool, timeouts, fail_message, settings); }; std::vector results = getManyImpl(settings, pool_mode, try_get_entry); @@ -86,22 +91,27 @@ std::vector ConnectionPoolWithFailover::getMany(const Se return entries; } -std::vector ConnectionPoolWithFailover::getManyForTableFunction(const Settings * settings, PoolMode pool_mode) +std::vector ConnectionPoolWithFailover::getManyForTableFunction( + const ConnectionTimeouts & timeouts, + const Settings * settings, + PoolMode pool_mode) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) { - return tryGetEntry(pool, fail_message, settings); + return tryGetEntry(pool, timeouts, fail_message, settings); }; return getManyImpl(settings, pool_mode, try_get_entry); } std::vector ConnectionPoolWithFailover::getManyChecked( - const Settings * settings, PoolMode pool_mode, const QualifiedTableName & table_to_check) + const ConnectionTimeouts & timeouts, + const Settings * settings, PoolMode pool_mode, + const QualifiedTableName & table_to_check) { TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message) { - return tryGetEntry(pool, fail_message, settings, &table_to_check); + return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check); }; return getManyImpl(settings, pool_mode, try_get_entry); @@ -113,6 +123,9 @@ std::vector ConnectionPoolWithFailover::g const TryGetEntryFunc & try_get_entry) { size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1; + size_t max_tries = (settings ? + size_t{settings->connections_with_failover_max_tries} : + size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES}); size_t max_entries; if (pool_mode == PoolMode::GET_ALL) { @@ -144,12 +157,13 @@ std::vector ConnectionPoolWithFailover::g bool fallback_to_stale_replicas = settings ? bool(settings->fallback_to_stale_replicas_for_distributed_queries) : true; - return Base::getMany(min_entries, max_entries, try_get_entry, get_priority, fallback_to_stale_replicas); + return Base::getMany(min_entries, max_entries, max_tries, try_get_entry, get_priority, fallback_to_stale_replicas); } ConnectionPoolWithFailover::TryResult ConnectionPoolWithFailover::tryGetEntry( IConnectionPool & pool, + const ConnectionTimeouts & timeouts, std::string & fail_message, const Settings * settings, const QualifiedTableName * table_to_check) @@ -157,15 +171,15 @@ ConnectionPoolWithFailover::tryGetEntry( TryResult result; try { - result.entry = pool.get(settings, /* force_connected = */ false); + result.entry = pool.get(timeouts, settings, /* force_connected = */ false); UInt64 server_revision = 0; if (table_to_check) - server_revision = result.entry->getServerRevision(); + server_revision = result.entry->getServerRevision(timeouts); if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) { - result.entry->forceConnected(); + result.entry->forceConnected(timeouts); result.is_usable = true; result.is_up_to_date = true; return result; @@ -176,7 +190,7 @@ ConnectionPoolWithFailover::tryGetEntry( TablesStatusRequest status_request; status_request.tables.emplace(*table_to_check); - TablesStatusResponse status_response = result.entry->getTablesStatus(status_request); + TablesStatusResponse status_response = result.entry->getTablesStatus(timeouts, status_request); auto table_status_it = status_response.table_states_by_id.find(*table_to_check); if (table_status_it == status_response.table_states_by_id.end()) { diff --git a/dbms/src/Client/ConnectionPoolWithFailover.h b/dbms/src/Client/ConnectionPoolWithFailover.h index 62ca75859ba..968d751fb92 100644 --- a/dbms/src/Client/ConnectionPoolWithFailover.h +++ b/dbms/src/Client/ConnectionPoolWithFailover.h @@ -34,21 +34,24 @@ public: ConnectionPoolWithFailover( ConnectionPoolPtrs nested_pools_, LoadBalancing load_balancing, - size_t max_tries_ = DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, time_t decrease_error_period_ = DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD); using Entry = IConnectionPool::Entry; /** Allocates connection to work. */ - Entry get(const Settings * settings = nullptr, bool force_connected = true) override; /// From IConnectionPool + Entry get(const ConnectionTimeouts & timeouts, + const Settings * settings = nullptr, + bool force_connected = true) override; /// From IConnectionPool /** Allocates up to the specified number of connections to work. * Connections provide access to different replicas of one shard. */ - std::vector getMany(const Settings * settings, PoolMode pool_mode); + std::vector getMany(const ConnectionTimeouts & timeouts, + const Settings * settings, PoolMode pool_mode); /// The same as getMany(), but return std::vector. - std::vector getManyForTableFunction(const Settings * settings, PoolMode pool_mode); + std::vector getManyForTableFunction(const ConnectionTimeouts & timeouts, + const Settings * settings, PoolMode pool_mode); using Base = PoolWithFailoverBase; using TryResult = Base::TryResult; @@ -56,7 +59,10 @@ public: /// The same as getMany(), but check that replication delay for table_to_check is acceptable. /// Delay threshold is taken from settings. std::vector getManyChecked( - const Settings * settings, PoolMode pool_mode, const QualifiedTableName & table_to_check); + const ConnectionTimeouts & timeouts, + const Settings * settings, + PoolMode pool_mode, + const QualifiedTableName & table_to_check); private: /// Get the values of relevant settings and call Base::getMany() @@ -70,6 +76,7 @@ private: /// for this table is not too large. TryResult tryGetEntry( IConnectionPool & pool, + const ConnectionTimeouts & timeouts, std::string & fail_message, const Settings * settings, const QualifiedTableName * table_to_check = nullptr); diff --git a/dbms/src/Client/MultiplexedConnections.cpp b/dbms/src/Client/MultiplexedConnections.cpp index 4c6e8fa72df..5c05ee9c5f5 100644 --- a/dbms/src/Client/MultiplexedConnections.cpp +++ b/dbms/src/Client/MultiplexedConnections.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -73,6 +74,7 @@ void MultiplexedConnections::sendExternalTablesData(std::vectorgetServerRevision() < DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD) + if (replica.connection->getServerRevision(timeouts) < DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD) { /// Disable two-level aggregation due to version incompatibility. modified_settings.group_by_two_level_threshold = 0; @@ -107,13 +109,15 @@ void MultiplexedConnections::sendQuery( for (size_t i = 0; i < num_replicas; ++i) { modified_settings.parallel_replica_offset = i; - replica_states[i].connection->sendQuery(query, query_id, stage, &modified_settings, client_info, with_pending_data); + replica_states[i].connection->sendQuery(timeouts, query, query_id, + stage, &modified_settings, client_info, with_pending_data); } } else { /// Use single replica. - replica_states[0].connection->sendQuery(query, query_id, stage, &modified_settings, client_info, with_pending_data); + replica_states[0].connection->sendQuery(timeouts, query, query_id, stage, + &modified_settings, client_info, with_pending_data); } sent_query = true; diff --git a/dbms/src/Client/MultiplexedConnections.h b/dbms/src/Client/MultiplexedConnections.h index 074a8c8d981..b8567dcd979 100644 --- a/dbms/src/Client/MultiplexedConnections.h +++ b/dbms/src/Client/MultiplexedConnections.h @@ -1,9 +1,10 @@ #pragma once +#include #include #include #include -#include +#include namespace DB { @@ -31,6 +32,7 @@ public: /// Send request to replicas. void sendQuery( + const ConnectionTimeouts & timeouts, const String & query, const String & query_id = "", UInt64 stage = QueryProcessingStage::Complete, diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index f15d066f8cf..5881f6bc501 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -430,6 +430,7 @@ namespace ErrorCodes extern const int MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES = 453; extern const int OPENSSL_ERROR = 454; extern const int SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY = 455; + extern const int CANNOT_UNLINK = 458; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Common/PoolWithFailoverBase.h b/dbms/src/Common/PoolWithFailoverBase.h index 40f60fb838a..917b9328249 100644 --- a/dbms/src/Common/PoolWithFailoverBase.h +++ b/dbms/src/Common/PoolWithFailoverBase.h @@ -55,11 +55,9 @@ public: PoolWithFailoverBase( NestedPools nested_pools_, - size_t max_tries_, time_t decrease_error_period_, Logger * log_) : nested_pools(std::move(nested_pools_)) - , max_tries(max_tries_) , decrease_error_period(decrease_error_period_) , shared_pool_states(nested_pools.size()) , log(log_) @@ -108,7 +106,7 @@ public: /// The method will throw if it is unable to get min_entries alive connections or /// if fallback_to_stale_replicas is false and it is unable to get min_entries connections to up-to-date replicas. std::vector getMany( - size_t min_entries, size_t max_entries, + size_t min_entries, size_t max_entries, size_t max_tries, const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority = GetPriorityFunc(), bool fallback_to_stale_replicas = true); @@ -125,8 +123,6 @@ protected: NestedPools nested_pools; - const size_t max_tries; - const time_t decrease_error_period; std::mutex pool_states_mutex; @@ -141,7 +137,7 @@ template typename TNestedPool::Entry PoolWithFailoverBase::get(const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority) { - std::vector results = getMany(1, 1, try_get_entry, get_priority); + std::vector results = getMany(1, 1, 1, try_get_entry, get_priority); if (results.empty() || results[0].entry.isNull()) throw DB::Exception( "PoolWithFailoverBase::getMany() returned less than min_entries entries.", @@ -152,7 +148,7 @@ PoolWithFailoverBase::get(const TryGetEntryFunc & try_get_entry, co template std::vector::TryResult> PoolWithFailoverBase::getMany( - size_t min_entries, size_t max_entries, + size_t min_entries, size_t max_entries, size_t max_tries, const TryGetEntryFunc & try_get_entry, const GetPriorityFunc & get_priority, bool fallback_to_stale_replicas) @@ -167,9 +163,9 @@ PoolWithFailoverBase::getMany( struct ShuffledPool { - NestedPool * pool; - const PoolState * state; - size_t index; + NestedPool * pool{}; + const PoolState * state{}; + size_t index = 0; size_t error_count = 0; }; @@ -192,7 +188,7 @@ PoolWithFailoverBase::getMany( size_t up_to_date_count = 0; size_t failed_pools_count = 0; - /// At exit update shared error counts with error counts occured during this call. + /// At exit update shared error counts with error counts occurred during this call. SCOPE_EXIT( { std::lock_guard lock(pool_states_mutex); diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 43ee947d338..35a68c5a90c 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -90,12 +90,17 @@ struct NetlinkMessage const char * request_buf = reinterpret_cast(this); ssize_t request_size = header.nlmsg_len; - ::sockaddr_nl nladdr{}; + union + { + ::sockaddr_nl nladdr{}; + ::sockaddr sockaddr; + }; + nladdr.nl_family = AF_NETLINK; while (true) { - ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, reinterpret_cast(&nladdr), sizeof(nladdr)); + ssize_t bytes_sent = ::sendto(fd, request_buf, request_size, 0, &sockaddr, sizeof(nladdr)); if (bytes_sent <= 0) { @@ -236,10 +241,14 @@ TaskStatsInfoGetter::TaskStatsInfoGetter() if (0 != ::setsockopt(netlink_socket_fd, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&tv), sizeof(tv))) throwFromErrno("Can't set timeout on PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); - ::sockaddr_nl addr{}; + union + { + ::sockaddr_nl addr{}; + ::sockaddr sockaddr; + }; addr.nl_family = AF_NETLINK; - if (::bind(netlink_socket_fd, reinterpret_cast(&addr), sizeof(addr)) < 0) + if (::bind(netlink_socket_fd, &sockaddr, sizeof(addr)) < 0) throwFromErrno("Can't bind PF_NETLINK socket", ErrorCodes::NETLINK_ERROR); taskstats_family_id = getFamilyId(netlink_socket_fd); diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h index 646ad57aa1f..907af50aefa 100644 --- a/dbms/src/Common/Volnitsky.h +++ b/dbms/src/Common/Volnitsky.h @@ -187,8 +187,8 @@ namespace VolnitskyTraits /// put ngram for uppercase utf8.convert(u_u32, seq, sizeof(seq)); - chars.c0 = seq[seq_ngram_offset]; - chars.c1 = seq[seq_ngram_offset + 1]; + chars.c0 = seq[seq_ngram_offset]; //-V519 + chars.c1 = seq[seq_ngram_offset + 1]; //-V519 putNGramBase(n, offset); } } diff --git a/dbms/src/Common/tests/thread_creation_latency.cpp b/dbms/src/Common/tests/thread_creation_latency.cpp index 9b551f713c1..120f9bca2cd 100644 --- a/dbms/src/Common/tests/thread_creation_latency.cpp +++ b/dbms/src/Common/tests/thread_creation_latency.cpp @@ -8,18 +8,10 @@ #include -int x = 0; +int value = 0; -void f() { ++x; } - -/*void f() -{ - std::vector vec; - for (size_t i = 0; i < 100; ++i) - vec.push_back(std::string(rand() % 10, ' ')); -}*/ - -void * g(void *) { f(); return {}; } +void f() { ++value; } +void * g(void *) { f(); return {}; } namespace DB @@ -34,7 +26,7 @@ namespace DB template void test(size_t n, const char * name, F && kernel) { - x = 0; + value = 0; Stopwatch watch; Stopwatch watch_one; @@ -62,7 +54,7 @@ void test(size_t n, const char * name, F && kernel) << n / watch.elapsedSeconds() << " ops/sec., " << "avg latency: " << watch.elapsedSeconds() / n * 1000000 << " μs, " << "max latency: " << max_seconds * 1000000 << " μs " - << "(res = " << x << ")" + << "(res = " << value << ")" << std::endl; } @@ -71,13 +63,6 @@ int main(int argc, char ** argv) { size_t n = argc == 2 ? DB::parse(argv[1]) : 100000; -/* test(n, "Create and destroy boost::threadpool each iteration", [] - { - boost::threadpool::pool tp(1); - tp.schedule(f); - tp.wait(); - });*/ - test(n, "Create and destroy ThreadPool each iteration", [] { ThreadPool tp(1); @@ -100,16 +85,6 @@ int main(int argc, char ** argv) thread.join(); }); -/* { - boost::threadpool::pool tp(1); - - test(n, "Schedule job for boost::threadpool each iteration", [&tp] - { - tp.schedule(f); - tp.wait(); - }); - }*/ - { ThreadPool tp(1); @@ -120,16 +95,6 @@ int main(int argc, char ** argv) }); } -/* { - boost::threadpool::pool tp(128); - - test(n, "Schedule job for boost::threadpool with 128 threads each iteration", [&tp] - { - tp.schedule(f); - tp.wait(); - }); - }*/ - { ThreadPool tp(128); diff --git a/dbms/src/Compression/CompressionCodecT64.cpp b/dbms/src/Compression/CompressionCodecT64.cpp new file mode 100644 index 00000000000..cd369fc9c4e --- /dev/null +++ b/dbms/src/Compression/CompressionCodecT64.cpp @@ -0,0 +1,520 @@ +#include + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int CANNOT_COMPRESS; +extern const int CANNOT_DECOMPRESS; +extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE; +extern const int ILLEGAL_CODEC_PARAMETER; +extern const int LOGICAL_ERROR; +} + +namespace +{ + +UInt8 codecId() +{ + return static_cast(CompressionMethodByte::T64); +} + +TypeIndex baseType(TypeIndex type_idx) +{ + switch (type_idx) + { + case TypeIndex::Int8: + return TypeIndex::Int8; + case TypeIndex::Int16: + return TypeIndex::Int16; + case TypeIndex::Int32: + case TypeIndex::Decimal32: + return TypeIndex::Int32; + case TypeIndex::Int64: + case TypeIndex::Decimal64: + return TypeIndex::Int64; + case TypeIndex::UInt8: + case TypeIndex::Enum8: + return TypeIndex::UInt8; + case TypeIndex::UInt16: + case TypeIndex::Enum16: + case TypeIndex::Date: + return TypeIndex::UInt16; + case TypeIndex::UInt32: + case TypeIndex::DateTime: + return TypeIndex::UInt32; + case TypeIndex::UInt64: + return TypeIndex::UInt64; + default: + break; + } + + return TypeIndex::Nothing; +} + +TypeIndex typeIdx(const DataTypePtr & data_type) +{ + if (!data_type) + return TypeIndex::Nothing; + + WhichDataType which(*data_type); + switch (which.idx) + { + case TypeIndex::Int8: + case TypeIndex::UInt8: + case TypeIndex::Enum8: + case TypeIndex::Int16: + case TypeIndex::UInt16: + case TypeIndex::Enum16: + case TypeIndex::Date: + case TypeIndex::Int32: + case TypeIndex::UInt32: + case TypeIndex::DateTime: + case TypeIndex::Decimal32: + case TypeIndex::Int64: + case TypeIndex::UInt64: + case TypeIndex::Decimal64: + return which.idx; + default: + break; + } + + return TypeIndex::Nothing; +} + +void transpose64x8(const UInt64 * src, UInt64 * dst, UInt32) +{ + auto * src8 = reinterpret_cast(src); + + for (UInt32 i = 0; i < 64; ++i) + { + UInt64 value = src8[i]; + dst[0] |= (value & 0x1) << i; + dst[1] |= ((value >> 1) & 0x1) << i; + dst[2] |= ((value >> 2) & 0x1) << i; + dst[3] |= ((value >> 3) & 0x1) << i; + dst[4] |= ((value >> 4) & 0x1) << i; + dst[5] |= ((value >> 5) & 0x1) << i; + dst[6] |= ((value >> 6) & 0x1) << i; + dst[7] |= ((value >> 7) & 0x1) << i; + } +} + +void revTranspose64x8(const UInt64 * src, UInt64 * dst, UInt32) +{ + auto * dst8 = reinterpret_cast(dst); + + for (UInt32 i = 0; i < 64; ++i) + { + dst8[i] = ((src[0] >> i) & 0x1) + | (((src[1] >> i) & 0x1) << 1) + | (((src[2] >> i) & 0x1) << 2) + | (((src[3] >> i) & 0x1) << 3) + | (((src[4] >> i) & 0x1) << 4) + | (((src[5] >> i) & 0x1) << 5) + | (((src[6] >> i) & 0x1) << 6) + | (((src[7] >> i) & 0x1) << 7); + } +} + +template +void transposeBytes(_T value, UInt64 * mx, UInt32 col) +{ + UInt8 * mx8 = reinterpret_cast(mx); + const UInt8 * value8 = reinterpret_cast(&value); + + if constexpr (sizeof(_T) > 4) + { + mx8[64 * 7 + col] = value8[7]; + mx8[64 * 6 + col] = value8[6]; + mx8[64 * 5 + col] = value8[5]; + mx8[64 * 4 + col] = value8[4]; + } + + if constexpr (sizeof(_T) > 2) + { + mx8[64 * 3 + col] = value8[3]; + mx8[64 * 2 + col] = value8[2]; + } + + if constexpr (sizeof(_T) > 1) + mx8[64 * 1 + col] = value8[1]; + + mx8[64 * 0 + col] = value8[0]; +} + +template +void revTransposeBytes(const UInt64 * mx, UInt32 col, _T & value) +{ + auto * mx8 = reinterpret_cast(mx); + + if constexpr (sizeof(_T) > 4) + { + value |= UInt64(mx8[64 * 7 + col]) << (8 * 7); + value |= UInt64(mx8[64 * 6 + col]) << (8 * 6); + value |= UInt64(mx8[64 * 5 + col]) << (8 * 5); + value |= UInt64(mx8[64 * 4 + col]) << (8 * 4); + } + + if constexpr (sizeof(_T) > 2) + { + value |= UInt32(mx8[64 * 3 + col]) << (8 * 3); + value |= UInt32(mx8[64 * 2 + col]) << (8 * 2); + } + + if constexpr (sizeof(_T) > 1) + value |= UInt32(mx8[64 * 1 + col]) << (8 * 1); + + value |= UInt32(mx8[col]); +} + + +/// UIntX[64] -> UInt64[N] transposed matrix, N <= X +template +void transpose(const char * src, char * dst, UInt32 num_bits, UInt32 tail = 64) +{ + UInt32 full_bytes = num_bits / 8; + UInt32 part_bits = num_bits % 8; + + UInt64 mx[64] = {}; + const char * ptr = src; + for (UInt32 col = 0; col < tail; ++col, ptr += sizeof(_T)) + { + _T value = unalignedLoad<_T>(ptr); + transposeBytes(value, mx, col); + } + + UInt32 full_size = sizeof(UInt64) * (num_bits - part_bits); + memcpy(dst, mx, full_size); + dst += full_size; + + /// transpose only partially filled last byte + if (part_bits) + { + UInt64 * partial = &mx[full_bytes * 8]; + UInt64 res[8] = {}; + transpose64x8(partial, res, part_bits); + memcpy(dst, res, part_bits * sizeof(UInt64)); + } +} + +/// UInt64[N] transposed matrix -> UIntX[64] +template , Int64, UInt64>> +void revTranspose(const char * src, char * dst, UInt32 num_bits, _MinMaxT min, _MinMaxT max [[maybe_unused]], UInt32 tail = 64) +{ + UInt64 mx[64] = {}; + memcpy(mx, src, num_bits * sizeof(UInt64)); + + UInt32 full_bytes = num_bits / 8; + UInt32 part_bits = num_bits % 8; + UInt64 * partial = &mx[full_bytes * 8]; + + if (part_bits) + { + UInt64 res[8] = {}; + revTranspose64x8(partial, res, part_bits); + memcpy(partial, res, 8 * sizeof(UInt64)); + } + + _T upper_min = 0; + if (num_bits < 64) + upper_min = UInt64(min) >> num_bits << num_bits; + + _T buf[64] = {}; + + if constexpr (std::is_signed_v<_T>) + { + /// Restore some data as negatives and others as positives + if (min < 0 && max >= 0 && num_bits < 64) + { + _T sign_bit = 1ull << (num_bits - 1); + _T upper_max = UInt64(max) >> num_bits << num_bits; + + for (UInt32 col = 0; col < tail; ++col) + { + _T & value = buf[col]; + revTransposeBytes(mx, col, value); + + if (value & sign_bit) + value |= upper_min; + else + value |= upper_max; + } + + memcpy(dst, buf, tail * sizeof(_T)); + return; + } + } + + for (UInt32 col = 0; col < tail; ++col) + { + _T & value = buf[col]; + revTransposeBytes(mx, col, value); + + value |= upper_min; + } + + memcpy(dst, buf, tail * sizeof(_T)); +} + + +UInt32 getValuableBitsNumber(UInt64 min, UInt64 max) +{ + UInt64 diff_bits = min ^ max; + if (diff_bits) + return 64 - __builtin_clzll(diff_bits); + return 0; +} + +UInt32 getValuableBitsNumber(Int64 min, Int64 max) +{ + if (min < 0 && max >= 0) + { + if (min + max >= 0) + return getValuableBitsNumber(0ull, UInt64(max)) + 1; + else + return getValuableBitsNumber(0ull, UInt64(~min)) + 1; + } + else + return getValuableBitsNumber(UInt64(min), UInt64(max)); +} + + +template +void findMinMax(const char * src, UInt32 src_size, _T & min, _T & max) +{ + min = unalignedLoad<_T>(src); + max = unalignedLoad<_T>(src); + + const char * end = src + src_size; + for (; src < end; src += sizeof(_T)) + { + auto current = unalignedLoad<_T>(src); + if (current < min) + min = current; + if (current > max) + max = current; + } +} + +template +UInt32 compressData(const char * src, UInt32 bytes_size, char * dst) +{ + using MinMaxType = std::conditional_t, Int64, UInt64>; + + const UInt32 mx_size = 64; + const UInt32 header_size = 2 * sizeof(UInt64); + + if (bytes_size % sizeof(_T)) + throw Exception("Cannot compress, data size " + toString(bytes_size) + " is not multiplier of " + toString(sizeof(_T)), + ErrorCodes::CANNOT_COMPRESS); + + UInt32 src_size = bytes_size / sizeof(_T); + UInt32 num_full = src_size / mx_size; + UInt32 tail = src_size % mx_size; + + _T min, max; + findMinMax<_T>(src, bytes_size, min, max); + MinMaxType min64 = min; + MinMaxType max64 = max; + + /// Write header + { + memcpy(dst, &min64, sizeof(MinMaxType)); + memcpy(dst + 8, &max64, sizeof(MinMaxType)); + dst += header_size; + } + + UInt32 num_bits = getValuableBitsNumber(min64, max64); + if (!num_bits) + return header_size; + + UInt32 src_shift = sizeof(_T) * mx_size; + UInt32 dst_shift = sizeof(UInt64) * num_bits; + for (UInt32 i = 0; i < num_full; ++i) + { + transpose<_T>(src, dst, num_bits); + src += src_shift; + dst += dst_shift; + } + + UInt32 dst_bytes = num_full * dst_shift; + + if (tail) + { + transpose<_T>(src, dst, num_bits, tail); + dst_bytes += dst_shift; + } + + return header_size + dst_bytes; +} + +template +void decompressData(const char * src, UInt32 bytes_size, char * dst, UInt32 uncompressed_size) +{ + using MinMaxType = std::conditional_t, Int64, UInt64>; + + const UInt32 header_size = 2 * sizeof(UInt64); + + if (bytes_size < header_size) + throw Exception("Cannot decompress, data size " + toString(bytes_size) + " is less then T64 header", + ErrorCodes::CANNOT_DECOMPRESS); + + if (uncompressed_size % sizeof(_T)) + throw Exception("Cannot decompress, unexpected uncompressed size " + toString(uncompressed_size), + ErrorCodes::CANNOT_DECOMPRESS); + + UInt64 num_elements = uncompressed_size / sizeof(_T); + MinMaxType min; + MinMaxType max; + + /// Read header + { + memcpy(&min, src, sizeof(MinMaxType)); + memcpy(&max, src + 8, sizeof(MinMaxType)); + src += header_size; + bytes_size -= header_size; + } + + UInt32 num_bits = getValuableBitsNumber(min, max); + if (!num_bits) + { + _T min_value = min; + for (UInt32 i = 0; i < num_elements; ++i, dst += sizeof(_T)) + unalignedStore(dst, min_value); + return; + } + + UInt32 src_shift = sizeof(UInt64) * num_bits; + UInt32 dst_shift = sizeof(_T) * 64; + + if (!bytes_size || bytes_size % src_shift) + throw Exception("Cannot decompress, data size " + toString(bytes_size) + " is not multiplier of " + toString(src_shift), + ErrorCodes::CANNOT_DECOMPRESS); + + UInt32 num_full = bytes_size / src_shift; + UInt32 tail = num_elements % 64; + if (tail) + --num_full; + + for (UInt32 i = 0; i < num_full; ++i) + { + revTranspose<_T>(src, dst, num_bits, min, max); + src += src_shift; + dst += dst_shift; + } + + if (tail) + revTranspose<_T>(src, dst, num_bits, min, max, tail); +} + +} + +UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, char * dst) const +{ + memcpy(dst, &type_idx, 1); + dst += 1; + + switch (baseType(type_idx)) + { + case TypeIndex::Int8: + return 1 + compressData(src, src_size, dst); + case TypeIndex::Int16: + return 1 + compressData(src, src_size, dst); + case TypeIndex::Int32: + return 1 + compressData(src, src_size, dst); + case TypeIndex::Int64: + return 1 + compressData(src, src_size, dst); + case TypeIndex::UInt8: + return 1 + compressData(src, src_size, dst); + case TypeIndex::UInt16: + return 1 + compressData(src, src_size, dst); + case TypeIndex::UInt32: + return 1 + compressData(src, src_size, dst); + case TypeIndex::UInt64: + return 1 + compressData(src, src_size, dst); + default: + break; + } + + throw Exception("Connot compress with T64", ErrorCodes::CANNOT_COMPRESS); +} + +void CompressionCodecT64::doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const +{ + if (!src_size) + throw Exception("Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS); + + UInt8 saved_type_id = unalignedLoad(src); + src += 1; + src_size -= 1; + + TypeIndex actual_type_id = type_idx; + if (actual_type_id == TypeIndex::Nothing) + actual_type_id = static_cast(saved_type_id); + + switch (baseType(actual_type_id)) + { + case TypeIndex::Int8: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::Int16: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::Int32: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::Int64: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::UInt8: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::UInt16: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::UInt32: + return decompressData(src, src_size, dst, uncompressed_size); + case TypeIndex::UInt64: + return decompressData(src, src_size, dst, uncompressed_size); + default: + break; + } + + throw Exception("Connot decompress with T64", ErrorCodes::CANNOT_DECOMPRESS); +} + +void CompressionCodecT64::useInfoAboutType(DataTypePtr data_type) +{ + if (data_type) + { + type_idx = typeIdx(data_type); + if (type_idx == TypeIndex::Nothing) + throw Exception("T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); + } +} + +UInt8 CompressionCodecT64::getMethodByte() const +{ + return codecId(); +} + +void registerCodecT64(CompressionCodecFactory & factory) +{ + auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr + { + if (arguments && !arguments->children.empty()) + throw Exception("T64 codec should not have parameters", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); + + auto type_idx = typeIdx(type); + if (type && type_idx == TypeIndex::Nothing) + throw Exception("T64 codec is not supported for specified type", ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE); + return std::make_shared(type_idx); + }; + + factory.registerCompressionCodecWithType("T64", codecId(), reg_func); +} +} diff --git a/dbms/src/Compression/CompressionCodecT64.h b/dbms/src/Compression/CompressionCodecT64.h new file mode 100644 index 00000000000..6425705b73f --- /dev/null +++ b/dbms/src/Compression/CompressionCodecT64.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class CompressionCodecT64 : public ICompressionCodec +{ +public: + static constexpr UInt32 HEADER_SIZE = 1 + 2 * sizeof(UInt64); + static constexpr UInt32 MAX_COMPRESSED_BLOCK_SIZE = sizeof(UInt64) * 64; + + CompressionCodecT64(TypeIndex type_idx_) + : type_idx(type_idx_) + {} + + UInt8 getMethodByte() const override; + String getCodecDesc() const override { return "T64"; } + + void useInfoAboutType(DataTypePtr data_type) override; + +protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; + void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; + + UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override + { + /// uncompressed_size - (uncompressed_size % (sizeof(T) * 64)) + sizeof(UInt64) * sizeof(T) + header_size + return uncompressed_size + MAX_COMPRESSED_BLOCK_SIZE + HEADER_SIZE; + } + +private: + TypeIndex type_idx; +}; + +} diff --git a/dbms/src/Compression/CompressionFactory.cpp b/dbms/src/Compression/CompressionFactory.cpp index e7827d2ebc5..7f9a7a24f09 100644 --- a/dbms/src/Compression/CompressionFactory.cpp +++ b/dbms/src/Compression/CompressionFactory.cpp @@ -137,6 +137,7 @@ void registerCodecZSTD(CompressionCodecFactory & factory); void registerCodecMultiple(CompressionCodecFactory & factory); void registerCodecLZ4HC(CompressionCodecFactory & factory); void registerCodecDelta(CompressionCodecFactory & factory); +void registerCodecT64(CompressionCodecFactory & factory); CompressionCodecFactory::CompressionCodecFactory() { @@ -147,6 +148,7 @@ CompressionCodecFactory::CompressionCodecFactory() registerCodecMultiple(*this); registerCodecLZ4HC(*this); registerCodecDelta(*this); + registerCodecT64(*this); } } diff --git a/dbms/src/Compression/CompressionInfo.h b/dbms/src/Compression/CompressionInfo.h index ddcfebe3b64..430840686c9 100644 --- a/dbms/src/Compression/CompressionInfo.h +++ b/dbms/src/Compression/CompressionInfo.h @@ -40,6 +40,7 @@ enum class CompressionMethodByte : uint8_t ZSTD = 0x90, Multiple = 0x91, Delta = 0x92, + T64 = 0x93, }; } diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.cpp b/dbms/src/DataStreams/RemoteBlockInputStream.cpp index e8a29880aad..740e60ffb09 100644 --- a/dbms/src/DataStreams/RemoteBlockInputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockInputStream.cpp @@ -7,6 +7,8 @@ #include #include +#include + namespace DB { @@ -61,17 +63,17 @@ RemoteBlockInputStream::RemoteBlockInputStream( create_multiplexed_connections = [this, pool, throttler]() { const Settings & current_settings = context.getSettingsRef(); - + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); std::vector connections; if (main_table) { - auto try_results = pool->getManyChecked(¤t_settings, pool_mode, *main_table); + auto try_results = pool->getManyChecked(timeouts, ¤t_settings, pool_mode, *main_table); connections.reserve(try_results.size()); for (auto & try_result : try_results) connections.emplace_back(std::move(try_result.entry)); } else - connections = pool->getMany(¤t_settings, pool_mode); + connections = pool->getMany(timeouts, ¤t_settings, pool_mode); return std::make_unique( std::move(connections), current_settings, throttler); @@ -283,12 +285,14 @@ void RemoteBlockInputStream::sendQuery() { multiplexed_connections = create_multiplexed_connections(); - if (context.getSettingsRef().skip_unavailable_shards && 0 == multiplexed_connections->size()) + const auto& settings = context.getSettingsRef(); + if (settings.skip_unavailable_shards && 0 == multiplexed_connections->size()) return; established = true; - multiplexed_connections->sendQuery(query, "", stage, &context.getClientInfo(), true); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); + multiplexed_connections->sendQuery(timeouts, query, "", stage, &context.getClientInfo(), true); established = false; sent_query = true; diff --git a/dbms/src/DataStreams/RemoteBlockInputStream.h b/dbms/src/DataStreams/RemoteBlockInputStream.h index 7f8626398dc..3cef2099030 100644 --- a/dbms/src/DataStreams/RemoteBlockInputStream.h +++ b/dbms/src/DataStreams/RemoteBlockInputStream.h @@ -96,6 +96,7 @@ private: const String query; Context context; + /// Temporary tables needed to be sent to remote servers Tables external_tables; QueryProcessingStage::Enum stage; @@ -118,7 +119,7 @@ private: */ std::atomic finished { false }; - /** Cancel query request was sent to all replicas beacuse data is not needed anymore + /** Cancel query request was sent to all replicas because data is not needed anymore * This behaviour may occur when: * - data size is already satisfactory (when using LIMIT, for example) * - an exception was thrown from client side diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp index ff5fc75f1c4..2f93d88ac7c 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.cpp +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -18,13 +19,16 @@ namespace ErrorCodes } -RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, const String & query_, const Settings * settings_) +RemoteBlockOutputStream::RemoteBlockOutputStream(Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings * settings_) : connection(connection_), query(query_), settings(settings_) { /** Send query and receive "header", that describe table structure. * Header is needed to know, what structure is required for blocks to be passed to 'write' method. */ - connection.sendQuery(query, "", QueryProcessingStage::Complete, settings, nullptr); + connection.sendQuery(timeouts, query, "", QueryProcessingStage::Complete, settings, nullptr); while (true) { diff --git a/dbms/src/DataStreams/RemoteBlockOutputStream.h b/dbms/src/DataStreams/RemoteBlockOutputStream.h index 41740c39837..8887277c657 100644 --- a/dbms/src/DataStreams/RemoteBlockOutputStream.h +++ b/dbms/src/DataStreams/RemoteBlockOutputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -18,7 +19,10 @@ struct Settings; class RemoteBlockOutputStream : public IBlockOutputStream { public: - RemoteBlockOutputStream(Connection & connection_, const String & query_, const Settings * settings_ = nullptr); + RemoteBlockOutputStream(Connection & connection_, + const ConnectionTimeouts & timeouts, + const String & query_, + const Settings * settings_ = nullptr); Block getHeader() const override { return header; } diff --git a/dbms/src/DataStreams/UnionBlockInputStream.h b/dbms/src/DataStreams/UnionBlockInputStream.h index 4eea1817cfb..c4e84e85845 100644 --- a/dbms/src/DataStreams/UnionBlockInputStream.h +++ b/dbms/src/DataStreams/UnionBlockInputStream.h @@ -43,10 +43,13 @@ private: public: using ExceptionCallback = std::function; - UnionBlockInputStream(BlockInputStreams inputs, BlockInputStreamPtr additional_input_at_end, size_t max_threads, - ExceptionCallback exception_callback_ = ExceptionCallback()) : - output_queue(std::min(inputs.size(), max_threads)), - handler(*this), + UnionBlockInputStream( + BlockInputStreams inputs, + BlockInputStreamPtr additional_input_at_end, + size_t max_threads, + ExceptionCallback exception_callback_ = ExceptionCallback() + ) : + output_queue(std::min(inputs.size(), max_threads)), handler(*this), processor(inputs, additional_input_at_end, max_threads, handler), exception_callback(exception_callback_) { diff --git a/dbms/src/DataTypes/tests/CMakeLists.txt b/dbms/src/DataTypes/tests/CMakeLists.txt index aa4cb34620b..e56dd166ea3 100644 --- a/dbms/src/DataTypes/tests/CMakeLists.txt +++ b/dbms/src/DataTypes/tests/CMakeLists.txt @@ -1,11 +1,5 @@ - add_executable (data_types_number_fixed data_types_number_fixed.cpp) target_link_libraries (data_types_number_fixed PRIVATE dbms) add_executable (data_type_string data_type_string.cpp) target_link_libraries (data_type_string PRIVATE dbms) - -if(USE_GTEST) - add_executable(data_type_get_common_type data_type_get_common_type.cpp) - target_link_libraries(data_type_get_common_type PRIVATE dbms ${GTEST_BOTH_LIBRARIES}) -endif() diff --git a/dbms/src/DataTypes/tests/data_type_get_common_type.cpp b/dbms/src/DataTypes/tests/data_type_get_common_type.cpp deleted file mode 100644 index 1d93146ba2e..00000000000 --- a/dbms/src/DataTypes/tests/data_type_get_common_type.cpp +++ /dev/null @@ -1,166 +0,0 @@ -#include -#include -#include - -#include - -#pragma GCC diagnostic ignored "-Wsign-compare" -#ifdef __clang__ - #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" - #pragma clang diagnostic ignored "-Wundef" -#endif -#include - - -using namespace DB; - - -TEST(data_type, data_type_get_common_type_Test) -{ - try - { - auto & data_type_factory = DataTypeFactory::instance(); - auto typeFromString = [& data_type_factory](const std::string & str) - { - return data_type_factory.get(str); - }; - - auto typesFromString = [& typeFromString](const std::string & str) - { - std::istringstream data_types_stream(str); - DataTypes data_types; - std::string data_type; - while (data_types_stream >> data_type) - data_types.push_back(typeFromString(data_type)); - - return data_types; - }; - - ASSERT_TRUE(getLeastSupertype(typesFromString(""))->equals(*typeFromString("Nothing"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Int8 Int8"))->equals(*typeFromString("Int8"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 Int8"))->equals(*typeFromString("Int16"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 Int16"))->equals(*typeFromString("Int16"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt32 UInt64"))->equals(*typeFromString("UInt64"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Int8 Int32 Int64"))->equals(*typeFromString("Int64"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt32 Int64"))->equals(*typeFromString("Int64"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 Float64"))->equals(*typeFromString("Float64"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 UInt16 Int16"))->equals(*typeFromString("Float32"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 UInt16 Int32"))->equals(*typeFromString("Float64"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 Int16 UInt32"))->equals(*typeFromString("Float64"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("Date Date"))->equals(*typeFromString("Date"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Date DateTime"))->equals(*typeFromString("DateTime"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("String FixedString(32) FixedString(8)"))->equals(*typeFromString("String"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Float64)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(Int16))"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(DateTime)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(String)"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(Nothing) Nothing"))->equals(*typeFromString("Nullable(Nothing)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("Nullable(Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(Nothing) UInt8 Int8"))->equals(*typeFromString("Nullable(Int16)"))); - - ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(Int16,Int16)"))); - ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(UInt8))"))); - - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int8 String"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int64 UInt64"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Float32 UInt64"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Float64 Int64"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64) Tuple(UInt64)"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64, Int8) Tuple(UInt64)"))); - EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Array(Int64) Array(String)"))); - - - ASSERT_TRUE(getMostSubtype(typesFromString(""))->equals(*typeFromString("Nothing"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("UInt8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 UInt8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Int8 Int8"))->equals(*typeFromString("Int8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 Int8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Int8 UInt16"))->equals(*typeFromString("Int8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 UInt32 UInt64"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Int8 Int32 Int64"))->equals(*typeFromString("Int8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 Int64 UInt64"))->equals(*typeFromString("UInt8"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("Float32 Float64"))->equals(*typeFromString("Float32"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Float32 UInt16 Int16"))->equals(*typeFromString("UInt16"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Float32 UInt16 Int32"))->equals(*typeFromString("UInt16"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Float32 Int16 UInt32"))->equals(*typeFromString("Int16"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("DateTime DateTime"))->equals(*typeFromString("DateTime"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Date DateTime"))->equals(*typeFromString("Date"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("String FixedString(8)"))->equals(*typeFromString("FixedString(8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("FixedString(16) FixedString(8)"))->equals(*typeFromString("Nothing"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(UInt8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Int16)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(UInt8))"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(Date)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nothing"))->equals(*typeFromString("Nothing"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("UInt8"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) UInt8 Int8"))->equals(*typeFromString("Nothing"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(UInt8) Nullable(Int8)"))->equals(*typeFromString("Nullable(UInt8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nullable(Int8)"))->equals(*typeFromString("Nullable(Nothing)"))); - - ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(UInt8,UInt8)"))); - ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(Nothing))"))); - - EXPECT_ANY_THROW(getMostSubtype(typesFromString("Int8 String"), true)); - EXPECT_ANY_THROW(getMostSubtype(typesFromString("Nothing"), true)); - EXPECT_ANY_THROW(getMostSubtype(typesFromString("FixedString(16) FixedString(8) String"), true)); - - } - catch (const Exception & e) - { - std::string text = e.displayText(); - - bool print_stack_trace = true; - - auto embedded_stack_trace_pos = text.find("Stack trace"); - if (std::string::npos != embedded_stack_trace_pos && !print_stack_trace) - text.resize(embedded_stack_trace_pos); - - std::cerr << "Code: " << e.code() << ". " << text << std::endl << std::endl; - - if (print_stack_trace && std::string::npos == embedded_stack_trace_pos) - { - std::cerr << "Stack trace:" << std::endl - << e.getStackTrace().toString(); - } - - throw; - } - catch (const Poco::Exception & e) - { - std::cerr << "Poco::Exception: " << e.displayText() << std::endl; - throw; - } - catch (const std::exception & e) - { - std::cerr << "std::exception: " << e.what() << std::endl; - throw; - } - catch (...) - { - std::cerr << "Unknown exception" << std::endl; - throw; - } -} diff --git a/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp new file mode 100644 index 00000000000..d2782ae9179 --- /dev/null +++ b/dbms/src/DataTypes/tests/gtest_data_type_get_common_type.cpp @@ -0,0 +1,127 @@ +#include +#include +#include + +#include + +#pragma GCC diagnostic ignored "-Wsign-compare" +#ifdef __clang__ + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" + #pragma clang diagnostic ignored "-Wundef" +#endif +#include + + +using namespace DB; + + +TEST(data_type, data_type_get_common_type_Test) +{ + auto & data_type_factory = DataTypeFactory::instance(); + auto typeFromString = [& data_type_factory](const std::string & str) + { + return data_type_factory.get(str); + }; + + auto typesFromString = [& typeFromString](const std::string & str) + { + std::istringstream data_types_stream(str); + DataTypes data_types; + std::string data_type; + while (data_types_stream >> data_type) + data_types.push_back(typeFromString(data_type)); + + return data_types; + }; + + ASSERT_TRUE(getLeastSupertype(typesFromString(""))->equals(*typeFromString("Nothing"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Int8 Int8"))->equals(*typeFromString("Int8"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 Int8"))->equals(*typeFromString("Int16"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 Int16"))->equals(*typeFromString("Int16"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt32 UInt64"))->equals(*typeFromString("UInt64"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Int8 Int32 Int64"))->equals(*typeFromString("Int64"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("UInt8 UInt32 Int64"))->equals(*typeFromString("Int64"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 Float64"))->equals(*typeFromString("Float64"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 UInt16 Int16"))->equals(*typeFromString("Float32"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 UInt16 Int32"))->equals(*typeFromString("Float64"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Float32 Int16 UInt32"))->equals(*typeFromString("Float64"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("Date Date"))->equals(*typeFromString("Date"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Date DateTime"))->equals(*typeFromString("DateTime"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("String FixedString(32) FixedString(8)"))->equals(*typeFromString("String"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(Int16)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Float64)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(Int16))"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(DateTime)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(String)"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(Nothing) Nothing"))->equals(*typeFromString("Nullable(Nothing)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("Nullable(Int16)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Nullable(Nothing) UInt8 Int8"))->equals(*typeFromString("Nullable(Int16)"))); + + ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(Int16,Int16)"))); + ASSERT_TRUE(getLeastSupertype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(UInt8))"))); + + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int8 String"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Int64 UInt64"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Float32 UInt64"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Float64 Int64"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64) Tuple(UInt64)"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Tuple(Int64, Int8) Tuple(UInt64)"))); + EXPECT_ANY_THROW(getLeastSupertype(typesFromString("Array(Int64) Array(String)"))); + + + ASSERT_TRUE(getMostSubtype(typesFromString(""))->equals(*typeFromString("Nothing"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Nothing"))->equals(*typeFromString("Nothing"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("UInt8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 UInt8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Int8 Int8"))->equals(*typeFromString("Int8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 Int8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Int8 UInt16"))->equals(*typeFromString("Int8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 UInt32 UInt64"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Int8 Int32 Int64"))->equals(*typeFromString("Int8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("UInt8 Int64 UInt64"))->equals(*typeFromString("UInt8"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("Float32 Float64"))->equals(*typeFromString("Float32"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Float32 UInt16 Int16"))->equals(*typeFromString("UInt16"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Float32 UInt16 Int32"))->equals(*typeFromString("UInt16"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Float32 Int16 UInt32"))->equals(*typeFromString("Int16"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("DateTime DateTime"))->equals(*typeFromString("DateTime"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Date DateTime"))->equals(*typeFromString("Date"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("String FixedString(8)"))->equals(*typeFromString("FixedString(8)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("FixedString(16) FixedString(8)"))->equals(*typeFromString("Nothing"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(UInt8)"))->equals(*typeFromString("Array(UInt8)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(UInt8) Array(Int8)"))->equals(*typeFromString("Array(UInt8)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(Float32) Array(Int16) Array(UInt32)"))->equals(*typeFromString("Array(Int16)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(UInt8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(Array(UInt8)) Array(Array(Int8))"))->equals(*typeFromString("Array(Array(UInt8))"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(Date) Array(DateTime)"))->equals(*typeFromString("Array(Date)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Array(String) Array(FixedString(32))"))->equals(*typeFromString("Array(FixedString(32))"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nothing"))->equals(*typeFromString("Nothing"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(UInt8) Int8"))->equals(*typeFromString("UInt8"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) UInt8 Int8"))->equals(*typeFromString("Nothing"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(UInt8) Nullable(Int8)"))->equals(*typeFromString("Nullable(UInt8)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Nullable(Nothing) Nullable(Int8)"))->equals(*typeFromString("Nullable(Nothing)"))); + + ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Int8,UInt8) Tuple(UInt8,Int8)"))->equals(*typeFromString("Tuple(UInt8,UInt8)"))); + ASSERT_TRUE(getMostSubtype(typesFromString("Tuple(Nullable(Nothing)) Tuple(Nullable(UInt8))"))->equals(*typeFromString("Tuple(Nullable(Nothing))"))); + + EXPECT_ANY_THROW(getMostSubtype(typesFromString("Int8 String"), true)); + EXPECT_ANY_THROW(getMostSubtype(typesFromString("Nothing"), true)); + EXPECT_ANY_THROW(getMostSubtype(typesFromString("FixedString(16) FixedString(8) String"), true)); +} diff --git a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp index cd609bfc70a..3703d11d832 100644 --- a/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/dbms/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -30,10 +30,8 @@ static ConnectionPoolWithFailoverPtr createPool( bool secure, const std::string & db, const std::string & user, - const std::string & password, - const Context & context) + const std::string & password) { - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef()); ConnectionPoolPtrs pools; pools.emplace_back(std::make_shared( MAX_CONNECTIONS, @@ -42,7 +40,6 @@ static ConnectionPoolWithFailoverPtr createPool( db, user, password, - timeouts, "ClickHouseDictionarySource", Protocol::Compression::Enable, secure ? Protocol::Secure::Enable : Protocol::Secure::Disable)); @@ -72,7 +69,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource( , sample_block{sample_block} , context(context_) , is_local{isLocalAddress({host, port}, context.getTCPPort())} - , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password)} , load_all_query{query_builder.composeLoadAllQuery()} { /// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication). @@ -98,7 +95,7 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar , sample_block{other.sample_block} , context(other.context) , is_local{other.is_local} - , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)} + , pool{is_local ? nullptr : createPool(host, port, secure, db, user, password)} , load_all_query{other.load_all_query} { } @@ -179,6 +176,7 @@ BlockInputStreamPtr ClickHouseDictionarySource::createStreamForSelectiveLoad(con { if (is_local) return executeQuery(query, context, true).in; + return std::make_shared(pool, query, sample_block, context); } diff --git a/dbms/src/Formats/CapnProtoRowInputStream.h b/dbms/src/Formats/CapnProtoRowInputStream.h index 02548b5a0fc..6e2ccd6042b 100644 --- a/dbms/src/Formats/CapnProtoRowInputStream.h +++ b/dbms/src/Formats/CapnProtoRowInputStream.h @@ -48,9 +48,9 @@ private: struct Action { enum Type { POP, PUSH, READ }; - Type type; - capnp::StructSchema::Field field = {}; - BlockPositionList columns = {}; + Type type{}; + capnp::StructSchema::Field field{}; + BlockPositionList columns{}; }; // Wrapper for classes that could throw in destructor diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index e204ae463fd..d8b8775a7a0 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -394,10 +394,10 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb template bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { - if constexpr (std::is_integral_v) - return tryReadIntText(x, rb); - else if constexpr (std::is_floating_point_v) + if constexpr (std::is_floating_point_v) return tryReadFloatText(x, rb); + else /*if constexpr (std::is_integral_v)*/ + return tryReadIntText(x, rb); } template <> diff --git a/dbms/src/Functions/SimdJSONParser.h b/dbms/src/Functions/SimdJSONParser.h index 7965507c529..c8e71491ae1 100644 --- a/dbms/src/Functions/SimdJSONParser.h +++ b/dbms/src/Functions/SimdJSONParser.h @@ -95,7 +95,7 @@ struct SimdJSONParser { do ++size; - while (it2.next() && it2.next()); + while (it2.next() && it2.next()); //-V501 } return size; } @@ -116,13 +116,13 @@ struct SimdJSONParser if (!it.down()) return false; while (index--) - if (!it.next() || !it.next()) + if (!it.next() || !it.next()) //-V501 return false; return it.next(); } static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.move_to_key(name.data); } - static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); } + static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); } //-V501 static bool nextObjectMember(Iterator & it, StringRef & next_key) { diff --git a/dbms/src/IO/ReadBufferFromFileBase.h b/dbms/src/IO/ReadBufferFromFileBase.h index 1fddd0c87cb..755f0269664 100644 --- a/dbms/src/IO/ReadBufferFromFileBase.h +++ b/dbms/src/IO/ReadBufferFromFileBase.h @@ -41,7 +41,7 @@ public: protected: ProfileCallback profile_callback; - clockid_t clock_type; + clockid_t clock_type{}; /// Children implementation should be able to seek backwards virtual off_t doSeek(off_t off, int whence) = 0; diff --git a/dbms/src/IO/WriteBufferFromOStream.h b/dbms/src/IO/WriteBufferFromOStream.h index 6a323be34d7..26345428ae2 100644 --- a/dbms/src/IO/WriteBufferFromOStream.h +++ b/dbms/src/IO/WriteBufferFromOStream.h @@ -12,7 +12,7 @@ namespace DB class WriteBufferFromOStream : public BufferWithOwnMemory { protected: - std::ostream * ostr; + std::ostream * ostr{}; void nextImpl() override; diff --git a/dbms/src/IO/readFloatText.h b/dbms/src/IO/readFloatText.h index 3e6ef1b1822..5e853c1c920 100644 --- a/dbms/src/IO/readFloatText.h +++ b/dbms/src/IO/readFloatText.h @@ -312,7 +312,7 @@ template ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) { static_assert(std::is_same_v || std::is_same_v, "Argument for readFloatTextImpl must be float or double"); - static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V501 + static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); //-V590 static constexpr bool throw_exception = std::is_same_v; diff --git a/dbms/src/Interpreters/ActionLocksManager.cpp b/dbms/src/Interpreters/ActionLocksManager.cpp index 6fa44741925..1f9329f85a9 100644 --- a/dbms/src/Interpreters/ActionLocksManager.cpp +++ b/dbms/src/Interpreters/ActionLocksManager.cpp @@ -13,6 +13,7 @@ namespace ActionLocks extern const StorageActionBlockType PartsFetch = 2; extern const StorageActionBlockType PartsSend = 3; extern const StorageActionBlockType ReplicationQueue = 4; + extern const StorageActionBlockType DistributedSend = 5; } diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index e347265d774..d550151a645 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -250,11 +250,10 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, address.host_name, address.port, address.default_database, address.user, address.password, - ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings).getSaturated(settings.max_execution_time), "server", address.compression, address.secure); info.pool = std::make_shared( - ConnectionPoolPtrs{pool}, settings.load_balancing, settings.connections_with_failover_max_tries); + ConnectionPoolPtrs{pool}, settings.load_balancing); info.per_replica_pools = {std::move(pool)}; if (weight) @@ -322,7 +321,6 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting settings.distributed_connections_pool_size, replica.host_name, replica.port, replica.default_database, replica.user, replica.password, - ConnectionTimeouts::getTCPTimeoutsWithFailover(settings).getSaturated(settings.max_execution_time), "server", replica.compression, replica.secure); all_replicas_pools.emplace_back(replica_pool); @@ -331,7 +329,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, const Setting } ConnectionPoolWithFailoverPtr shard_pool = std::make_shared( - all_replicas_pools, settings.load_balancing, settings.connections_with_failover_max_tries); + all_replicas_pools, settings.load_balancing); if (weight) slot_to_shard.insert(std::end(slot_to_shard), weight, shards_info.size()); @@ -375,7 +373,6 @@ Cluster::Cluster(const Settings & settings, const std::vector( - all_replicas, settings.load_balancing, settings.connections_with_failover_max_tries); + all_replicas, settings.load_balancing); slot_to_shard.insert(std::end(slot_to_shard), default_weight, shards_info.size()); shards_info.push_back({{}, current_shard_num, default_weight, std::move(shard_local_addresses), std::move(shard_pool), diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 0bf593a6078..ce16a431b37 100644 --- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -184,13 +184,17 @@ void SelectStreamFactory::createForShard( local_delay]() -> BlockInputStreamPtr { + auto current_settings = context.getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover( + current_settings).getSaturated( + current_settings.max_execution_time); std::vector try_results; try { if (table_func_ptr) - try_results = pool->getManyForTableFunction(&context.getSettingsRef(), PoolMode::GET_MANY); + try_results = pool->getManyForTableFunction(timeouts, ¤t_settings, PoolMode::GET_MANY); else - try_results = pool->getManyChecked(&context.getSettingsRef(), PoolMode::GET_MANY, main_table); + try_results = pool->getManyChecked(timeouts, ¤t_settings, PoolMode::GET_MANY, main_table); } catch (const Exception & ex) { diff --git a/dbms/src/Interpreters/ExpressionActions.h b/dbms/src/Interpreters/ExpressionActions.h index 3c244eeff19..156a56da866 100644 --- a/dbms/src/Interpreters/ExpressionActions.h +++ b/dbms/src/Interpreters/ExpressionActions.h @@ -69,7 +69,7 @@ public: ADD_ALIASES, }; - Type type; + Type type{}; /// For ADD/REMOVE/COPY_COLUMN. std::string source_name; diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.cpp b/dbms/src/Interpreters/InterpreterSystemQuery.cpp index 15d15fc4f14..3b00e20ee4c 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSystemQuery.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include "InterpreterSystemQuery.h" namespace DB @@ -42,6 +44,7 @@ namespace ActionLocks extern StorageActionBlockType PartsFetch; extern StorageActionBlockType PartsSend; extern StorageActionBlockType ReplicationQueue; + extern StorageActionBlockType DistributedSend; } @@ -194,9 +197,18 @@ BlockIO InterpreterSystemQuery::execute() case Type::START_REPLICATION_QUEUES: startStopAction(context, query, ActionLocks::ReplicationQueue, true); break; + case Type::STOP_DISTRIBUTED_SENDS: + startStopAction(context, query, ActionLocks::DistributedSend, false); + break; + case Type::START_DISTRIBUTED_SENDS: + startStopAction(context, query, ActionLocks::DistributedSend, true); + break; case Type::SYNC_REPLICA: syncReplica(query); break; + case Type::FLUSH_DISTRIBUTED: + flushDistributed(query); + break; case Type::RESTART_REPLICAS: restartReplicas(system_context); break; @@ -303,11 +315,21 @@ void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) StoragePtr table = context.getTable(database_name, table_name); - auto table_replicated = dynamic_cast(table.get()); - if (!table_replicated) + if (auto storage_replicated = dynamic_cast(table.get())) + storage_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.value.milliseconds()); + else throw Exception("Table " + database_name + "." + table_name + " is not replicated", ErrorCodes::BAD_ARGUMENTS); +} - table_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.value.milliseconds()); +void InterpreterSystemQuery::flushDistributed(ASTSystemQuery & query) +{ + String database_name = !query.target_database.empty() ? query.target_database : context.getCurrentDatabase(); + String & table_name = query.target_table; + + if (auto storage_distributed = dynamic_cast(context.getTable(database_name, table_name).get())) + storage_distributed->flushClusterNodesAllData(); + else + throw Exception("Table " + database_name + "." + table_name + " is not distributed", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Interpreters/InterpreterSystemQuery.h b/dbms/src/Interpreters/InterpreterSystemQuery.h index 65a6b13884b..31945745c1e 100644 --- a/dbms/src/Interpreters/InterpreterSystemQuery.h +++ b/dbms/src/Interpreters/InterpreterSystemQuery.h @@ -31,6 +31,7 @@ private: void restartReplicas(Context & system_context); void syncReplica(ASTSystemQuery & query); + void flushDistributed(ASTSystemQuery & query); }; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 3d21f85d408..71a49b4c106 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -474,6 +474,14 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const } } + +[[noreturn]] static void throwSyntaxException(const String & msg) +{ + throw Exception("Invalid expression for JOIN ON. " + msg + " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " + "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); +}; + + /// Parse JOIN ON expression and collect ASTs for joined columns. void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTTableJoin & table_join) { @@ -528,13 +536,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTT return table_belonging; }; - const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " - "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]"; - auto throwSyntaxException = [&](const String & msg) - { - throw Exception("Invalid expression for JOIN ON. " + msg + supported_syntax, ErrorCodes::INVALID_JOIN_ON_EXPRESSION); - }; - /// For equal expression find out corresponding table for each part, translate qualified names and add asts to join keys. auto add_columns_from_equals_expr = [&](const ASTPtr & expr) { diff --git a/dbms/src/Interpreters/tests/hash_map_string_small.cpp b/dbms/src/Interpreters/tests/hash_map_string_small.cpp index 0fa8854fa8a..730d88f0a12 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp @@ -23,7 +23,7 @@ struct SmallStringRef { - UInt32 size; + UInt32 size = 0; union { diff --git a/dbms/src/Parsers/ASTSystemQuery.cpp b/dbms/src/Parsers/ASTSystemQuery.cpp index 1f49453df48..699dd9d0f54 100644 --- a/dbms/src/Parsers/ASTSystemQuery.cpp +++ b/dbms/src/Parsers/ASTSystemQuery.cpp @@ -41,6 +41,8 @@ const char * ASTSystemQuery::typeToString(Type type) return "RESTART REPLICA"; case Type::SYNC_REPLICA: return "SYNC REPLICA"; + case Type::FLUSH_DISTRIBUTED: + return "FLUSH DISTRIBUTED"; case Type::RELOAD_DICTIONARY: return "RELOAD DICTIONARY"; case Type::RELOAD_DICTIONARIES: @@ -65,6 +67,10 @@ const char * ASTSystemQuery::typeToString(Type type) return "STOP REPLICATION QUEUES"; case Type::START_REPLICATION_QUEUES: return "START REPLICATION QUEUES"; + case Type::STOP_DISTRIBUTED_SENDS: + return "STOP DISTRIBUTED SENDS"; + case Type::START_DISTRIBUTED_SENDS: + return "START DISTRIBUTED SENDS"; case Type::FLUSH_LOGS: return "FLUSH LOGS"; default: @@ -99,12 +105,14 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, || type == Type::STOP_REPLICATED_SENDS || type == Type::START_REPLICATED_SENDS || type == Type::STOP_REPLICATION_QUEUES - || type == Type::START_REPLICATION_QUEUES) + || type == Type::START_REPLICATION_QUEUES + || type == Type::STOP_DISTRIBUTED_SENDS + || type == Type::START_DISTRIBUTED_SENDS) { if (!target_table.empty()) print_database_table(); } - else if (type == Type::RESTART_REPLICA || type == Type::SYNC_REPLICA) + else if (type == Type::RESTART_REPLICA || type == Type::SYNC_REPLICA || type == Type::FLUSH_DISTRIBUTED) { print_database_table(); } diff --git a/dbms/src/Parsers/ASTSystemQuery.h b/dbms/src/Parsers/ASTSystemQuery.h index d32a5dd08da..0ff8228e2e0 100644 --- a/dbms/src/Parsers/ASTSystemQuery.h +++ b/dbms/src/Parsers/ASTSystemQuery.h @@ -40,6 +40,9 @@ public: STOP_REPLICATION_QUEUES, START_REPLICATION_QUEUES, FLUSH_LOGS, + FLUSH_DISTRIBUTED, + STOP_DISTRIBUTED_SENDS, + START_DISTRIBUTED_SENDS, END }; diff --git a/dbms/src/Parsers/ParserSystemQuery.cpp b/dbms/src/Parsers/ParserSystemQuery.cpp index e3431c50be5..333613e9512 100644 --- a/dbms/src/Parsers/ParserSystemQuery.cpp +++ b/dbms/src/Parsers/ParserSystemQuery.cpp @@ -49,6 +49,7 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::RESTART_REPLICA: case Type::SYNC_REPLICA: + case Type::FLUSH_DISTRIBUTED: if (!parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table)) return false; break; @@ -61,6 +62,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: + case Type::STOP_DISTRIBUTED_SENDS: + case Type::START_DISTRIBUTED_SENDS: parseDatabaseAndTableName(pos, expected, res->target_database, res->target_table); break; diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp index 2500b519b23..ff780f6f7b7 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ namespace DB namespace ErrorCodes { + extern const int ABORTED; extern const int INCORRECT_FILE_NAME; extern const int CHECKSUM_DOESNT_MATCH; extern const int TOO_LARGE_SIZE_COMPRESSED; @@ -57,12 +59,14 @@ namespace } -StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool) +StorageDistributedDirectoryMonitor::StorageDistributedDirectoryMonitor( + StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool, ActionBlocker & monitor_blocker) : storage(storage), pool{pool}, path{storage.path + name + '/'} , current_batch_file_path{path + "current_batch.txt"} , default_sleep_time{storage.global_context.getSettingsRef().distributed_directory_monitor_sleep_time_ms.totalMilliseconds()} , sleep_time{default_sleep_time} , log{&Logger::get(getLoggerName())} + , monitor_blocker(monitor_blocker) { const Settings & settings = storage.global_context.getSettingsRef(); should_batch_inserts = settings.distributed_directory_monitor_batch_inserts; @@ -84,6 +88,14 @@ StorageDistributedDirectoryMonitor::~StorageDistributedDirectoryMonitor() } } +void StorageDistributedDirectoryMonitor::flushAllData() +{ + if (!quit) + { + std::unique_lock lock{mutex}; + processFiles(); + } +} void StorageDistributedDirectoryMonitor::shutdownAndDropAllData() { @@ -113,18 +125,25 @@ void StorageDistributedDirectoryMonitor::run() { auto do_sleep = true; - try + if (!monitor_blocker.isCancelled()) { - do_sleep = !findFiles(); + try + { + do_sleep = !processFiles(); + } + catch (...) + { + do_sleep = true; + ++error_count; + sleep_time = std::min( + std::chrono::milliseconds{Int64(default_sleep_time.count() * std::exp2(error_count))}, + std::chrono::milliseconds{max_sleep_time}); + tryLogCurrentException(getLoggerName().data()); + } } - catch (...) + else { - do_sleep = true; - ++error_count; - sleep_time = std::min( - std::chrono::milliseconds{Int64(default_sleep_time.count() * std::exp2(error_count))}, - std::chrono::milliseconds{max_sleep_time}); - tryLogCurrentException(getLoggerName().data()); + LOG_DEBUG(log, "Skipping send data over distributed table."); } if (do_sleep) @@ -142,8 +161,7 @@ void StorageDistributedDirectoryMonitor::run() ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::string & name, const StorageDistributed & storage) { - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context.getSettingsRef()); - const auto pool_factory = [&storage, &timeouts] (const Cluster::Address & address) -> ConnectionPoolPtr + const auto pool_factory = [&storage] (const Cluster::Address & address) -> ConnectionPoolPtr { const auto & cluster = storage.getCluster(); const auto & shards_info = cluster->getShardsInfo(); @@ -164,7 +182,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri } return std::make_shared( - 1, address.host_name, address.port, address.default_database, address.user, address.password, timeouts, + 1, address.host_name, address.port, address.default_database, address.user, address.password, storage.getName() + '_' + address.user, Protocol::Compression::Enable, address.secure); }; @@ -174,7 +192,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri } -bool StorageDistributedDirectoryMonitor::findFiles() +bool StorageDistributedDirectoryMonitor::processFiles() { std::map files; @@ -212,7 +230,8 @@ bool StorageDistributedDirectoryMonitor::findFiles() void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path) { LOG_TRACE(log, "Started processing `" << file_path << '`'); - auto connection = pool->get(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context.getSettingsRef()); + auto connection = pool->get(timeouts); try { @@ -224,7 +243,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa std::string insert_query; readQueryAndSettings(in, insert_settings, insert_query); - RemoteBlockOutputStream remote{*connection, insert_query, &insert_settings}; + RemoteBlockOutputStream remote{*connection, timeouts, insert_query, &insert_settings}; remote.writePrefix(); remote.writePrepared(in); @@ -334,8 +353,8 @@ struct StorageDistributedDirectoryMonitor::Batch WriteBufferFromFile out{parent.current_batch_file_path}; writeText(out); } - - auto connection = parent.pool->get(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(parent.storage.global_context.getSettingsRef()); + auto connection = parent.pool->get(timeouts); bool batch_broken = false; try @@ -361,7 +380,7 @@ struct StorageDistributedDirectoryMonitor::Batch if (first) { first = false; - remote = std::make_unique(*connection, insert_query, &insert_settings); + remote = std::make_unique(*connection, timeouts, insert_query, &insert_settings); remote->writePrefix(); } diff --git a/dbms/src/Storages/Distributed/DirectoryMonitor.h b/dbms/src/Storages/Distributed/DirectoryMonitor.h index 2c95947355d..9416db9be2c 100644 --- a/dbms/src/Storages/Distributed/DirectoryMonitor.h +++ b/dbms/src/Storages/Distributed/DirectoryMonitor.h @@ -19,15 +19,19 @@ namespace DB class StorageDistributedDirectoryMonitor { public: - StorageDistributedDirectoryMonitor(StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool); + StorageDistributedDirectoryMonitor( + StorageDistributed & storage, const std::string & name, const ConnectionPoolPtr & pool, ActionBlocker & monitor_blocker); + ~StorageDistributedDirectoryMonitor(); static ConnectionPoolPtr createPool(const std::string & name, const StorageDistributed & storage); + void flushAllData(); + void shutdownAndDropAllData(); private: void run(); - bool findFiles(); + bool processFiles(); void processFile(const std::string & file_path); void processFilesWithBatching(const std::map & files); @@ -57,6 +61,7 @@ private: std::mutex mutex; std::condition_variable cond; Logger * log; + ActionBlocker & monitor_blocker; ThreadFromGlobalPool thread{&StorageDistributedDirectoryMonitor::run, this}; /// Read insert query and insert settings for backward compatible. diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 836ba20a644..da374b1b65d 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -242,6 +242,8 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp { if (!job.stream) { + const Settings & settings = context.getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); if (shard_info.hasInternalReplication()) { /// Skip replica_index in case of internal replication @@ -249,7 +251,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR); /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here - auto connections = shard_info.pool->getMany(&context.getSettingsRef(), PoolMode::GET_ONE); + auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE); if (connections.empty() || connections.front().isNull()) throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR); @@ -263,7 +265,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp if (!connection_pool) throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR); - job.connection_entry = connection_pool->get(&context.getSettingsRef()); + job.connection_entry = connection_pool->get(timeouts, &settings); if (job.connection_entry.isNull()) throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR); } @@ -271,7 +273,7 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp if (throttler) job.connection_entry->setThrottler(throttler); - job.stream = std::make_shared(*job.connection_entry, query_string, &context.getSettingsRef()); + job.stream = std::make_shared(*job.connection_entry, timeouts, query_string, &settings); job.stream->writePrefix(); } diff --git a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index d3729bb0d08..96e1ba1429f 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -20,7 +20,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const MergeTreeData const auto & column_name = columns[i]; /// column has files and hence does not require evaluation - if (part->hasColumnFiles(column_name)) + if (part->hasColumnFiles(column_name, *storage.getColumn(column_name).type)) { all_column_files_missing = false; continue; diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index e6c874fdeaf..f1a02feb5e7 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1112,6 +1112,10 @@ void MergeTreeData::dropAllData() LOG_TRACE(log, "dropAllData: removing data from filesystem."); + /// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls. + for (DataPartPtr part : data_parts_by_info) /// a copy intended + part->remove(); + Poco::File(full_path).remove(true); LOG_TRACE(log, "dropAllData: done."); @@ -1323,7 +1327,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name if (!new_types.count(column.name)) { /// The column was deleted. - if (!part || part->hasColumnFiles(column.name)) + if (!part || part->hasColumnFiles(column.name, *column.type)) { column.type->enumerateStreams([&](const IDataType::SubstreamPath & substream_path) { @@ -1345,7 +1349,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name const String new_type_name = new_type->getName(); const auto * old_type = column.type.get(); - if (!new_type->equals(*old_type) && (!part || part->hasColumnFiles(column.name))) + if (!new_type->equals(*old_type) && (!part || part->hasColumnFiles(column.name, *column.type))) { if (isMetadataOnlyConversion(old_type, new_type)) { diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp index 0b3062b077e..9a9a8d9a536 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int NOT_FOUND_EXPECTED_DATA_PART; extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; extern const int BAD_TTL_FILE; + extern const int CANNOT_UNLINK; } @@ -216,7 +217,7 @@ String MergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const for (const auto & column : storage_columns) { - if (!hasColumnFiles(column.name)) + if (!hasColumnFiles(column.name, *column.type)) continue; const auto size = getColumnSize(column.name, *column.type).data_compressed; @@ -395,7 +396,36 @@ void MergeTreeDataPart::remove() const return; } - to_dir.remove(true); + try + { + /// Remove each expected file in directory, then remove directory itself. + + for (const auto & [file, _] : checksums.files) + { + String path_to_remove = to + "/" + file; + if (0 != unlink(path_to_remove.c_str())) + throwFromErrno("Cannot unlink file " + path_to_remove, ErrorCodes::CANNOT_UNLINK); + } + + for (const auto & file : {"checksums.txt", "columns.txt"}) + { + String path_to_remove = to + "/" + file; + if (0 != unlink(path_to_remove.c_str())) + throwFromErrno("Cannot unlink file " + path_to_remove, ErrorCodes::CANNOT_UNLINK); + } + + if (0 != rmdir(to.c_str())) + throwFromErrno("Cannot rmdir file " + to, ErrorCodes::CANNOT_UNLINK); + } + catch (...) + { + /// Recursive directory removal does many excessive "stat" syscalls under the hood. + + LOG_ERROR(storage.log, "Cannot quickly remove directory " << to << " by removing files; fallback to recursive removal. Reason: " + << getCurrentExceptionMessage(false)); + + to_dir.remove(true); + } } @@ -858,16 +888,22 @@ void MergeTreeDataPart::checkConsistency(bool require_part_metadata) } } -bool MergeTreeDataPart::hasColumnFiles(const String & column) const +bool MergeTreeDataPart::hasColumnFiles(const String & column_name, const IDataType & type) const { - /// NOTE: For multi-streams columns we check that just first file exist. - /// That's Ok under assumption that files exist either for all or for no streams. + bool res = true; - String prefix = getFullPath(); + type.enumerateStreams([&](const IDataType::SubstreamPath & substream_path) + { + String file_name = IDataType::getFileNameForStream(column_name, substream_path); - String escaped_column = escapeForFileName(column); - return Poco::File(prefix + escaped_column + ".bin").exists() - && Poco::File(prefix + escaped_column + storage.index_granularity_info.marks_file_extension).exists(); + auto bin_checksum = checksums.files.find(file_name + ".bin"); + auto mrk_checksum = checksums.files.find(file_name + storage.index_granularity_info.marks_file_extension); + + if (bin_checksum == checksums.files.end() || mrk_checksum == checksums.files.end()) + res = false; + }, {}); + + return res; } diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h index 8ca5907c487..921fea6ccaa 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataPart.h +++ b/dbms/src/Storages/MergeTree/MergeTreeDataPart.h @@ -274,7 +274,7 @@ struct MergeTreeDataPart void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency); /// Checks that .bin and .mrk files exist - bool hasColumnFiles(const String & column) const; + bool hasColumnFiles(const String & column, const IDataType & type) const; /// For data in RAM ('index') UInt64 getIndexSizeInBytes() const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp index 85d33896967..4cbf28c83c2 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -129,12 +129,14 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, prewhere_info && prewhere_info->remove_prewhere_column, per_part_should_reorder[part_idx], std::move(curr_task_size_predictor)); } -MarkRanges MergeTreeReadPool::getRestMarks(const std::string & part_path, const MarkRange & from) const +MarkRanges MergeTreeReadPool::getRestMarks(const MergeTreeDataPart & part, const MarkRange & from) const { MarkRanges all_part_ranges; + + /// Inefficient in presence of large number of data parts. for (const auto & part_ranges : parts_ranges) { - if (part_ranges.data_part->getFullPath() == part_path) + if (part_ranges.data_part.get() == &part) { all_part_ranges = part_ranges.ranges; break; @@ -142,7 +144,7 @@ MarkRanges MergeTreeReadPool::getRestMarks(const std::string & part_path, const } if (all_part_ranges.empty()) throw Exception("Trying to read marks range [" + std::to_string(from.begin) + ", " + std::to_string(from.end) + "] from part '" - + part_path + "' which has no ranges in this query", ErrorCodes::LOGICAL_ERROR); + + part.getFullPath() + "' which has no ranges in this query", ErrorCodes::LOGICAL_ERROR); auto begin = std::lower_bound(all_part_ranges.begin(), all_part_ranges.end(), from, [] (const auto & f, const auto & s) { return f.begin < s.begin; }); if (begin == all_part_ranges.end()) diff --git a/dbms/src/Storages/MergeTree/MergeTreeReadPool.h b/dbms/src/Storages/MergeTree/MergeTreeReadPool.h index ab2280d6cef..e7c8cb7c7da 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/dbms/src/Storages/MergeTree/MergeTreeReadPool.h @@ -81,7 +81,7 @@ public: void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info); /// This method tells which mark ranges we have to read if we start from @from mark range - MarkRanges getRestMarks(const std::string & part_path, const MarkRange & from) const; + MarkRanges getRestMarks(const MergeTreeDataPart & part, const MarkRange & from) const; Block getHeader() const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp index d155077d024..fb5b341df1d 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeReader.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeReader.cpp @@ -44,9 +44,6 @@ MergeTreeReader::MergeTreeReader(const String & path, { try { - if (!Poco::File(path).exists()) - throw Exception("Part " + path + " is missing", ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART); - for (const NameAndTypePair & column : columns) addStreams(column.name, *column.type, profile_callback, clock_type); } @@ -163,7 +160,7 @@ void MergeTreeReader::addStreams(const String & name, const IDataType & type, if (streams.count(stream_name)) return; - bool data_file_exists = Poco::File(path + stream_name + DATA_FILE_EXTENSION).exists(); + bool data_file_exists = data_part->checksums.files.count(stream_name + DATA_FILE_EXTENSION); /** If data file is missing then we will not try to open it. * It is necessary since it allows to add new column to structure of the table without creating new files for old parts. diff --git a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp index b34cca2c677..170a603ae23 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputStream.cpp @@ -74,7 +74,7 @@ bool MergeTreeThreadSelectBlockInputStream::getNewTask() if (!reader) { - auto rest_mark_ranges = pool->getRestMarks(path, task->mark_ranges[0]); + auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); if (use_uncompressed_cache) owned_uncompressed_cache = storage.global_context.getUncompressedCache(); @@ -95,7 +95,7 @@ bool MergeTreeThreadSelectBlockInputStream::getNewTask() /// in other case we can reuse readers, anyway they will be "seeked" to required mark if (path != last_readed_part_path) { - auto rest_mark_ranges = pool->getRestMarks(path, task->mark_ranges[0]); + auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); /// retain avg_value_size_hints reader = std::make_unique( path, task->data_part, task->columns, owned_uncompressed_cache.get(), owned_mark_cache.get(), save_marks_in_cache, diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h index c44f0b78f84..e3bb658b2d0 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h @@ -20,7 +20,7 @@ namespace DB struct ReplicatedMergeTreeQuorumEntry { String part_name; - size_t required_number_of_replicas; + size_t required_number_of_replicas{}; std::set replicas; ReplicatedMergeTreeQuorumEntry() {} diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index 4440d2b96ee..826407e0599 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -65,6 +65,10 @@ namespace ErrorCodes extern const int TOO_MANY_ROWS; } +namespace ActionLocks +{ + extern const StorageActionBlockType DistributedSend; +} namespace { @@ -427,7 +431,7 @@ void StorageDistributed::createDirectoryMonitors() void StorageDistributed::requireDirectoryMonitor(const std::string & name) { std::lock_guard lock(cluster_nodes_mutex); - cluster_nodes_data[name].requireDirectoryMonitor(name, *this); + cluster_nodes_data[name].requireDirectoryMonitor(name, *this, monitors_blocker); } ConnectionPoolPtr StorageDistributed::requireConnectionPool(const std::string & name) @@ -454,11 +458,17 @@ void StorageDistributed::ClusterNodeData::requireConnectionPool(const std::strin conneciton_pool = StorageDistributedDirectoryMonitor::createPool(name, storage); } -void StorageDistributed::ClusterNodeData::requireDirectoryMonitor(const std::string & name, StorageDistributed & storage) +void StorageDistributed::ClusterNodeData::requireDirectoryMonitor( + const std::string & name, StorageDistributed & storage, ActionBlocker & monitor_blocker) { requireConnectionPool(name, storage); if (!directory_monitor) - directory_monitor = std::make_unique(storage, name, conneciton_pool); + directory_monitor = std::make_unique(storage, name, conneciton_pool, monitor_blocker); +} + +void StorageDistributed::ClusterNodeData::flushAllData() +{ + directory_monitor->flushAllData(); } void StorageDistributed::ClusterNodeData::shutdownAndDropAllData() @@ -499,6 +509,22 @@ ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const Select return cluster->getClusterWithMultipleShards({shards.begin(), shards.end()}); } +ActionLock StorageDistributed::getActionLock(StorageActionBlockType type) +{ + if (type == ActionLocks::DistributedSend) + return monitors_blocker.cancel(); + return {}; +} + +void StorageDistributed::flushClusterNodesAllData() +{ + std::lock_guard lock(cluster_nodes_mutex); + + /// TODO: Maybe it should be executed in parallel + for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end(); ++it) + it->second.flushAllData(); +} + void registerStorageDistributed(StorageFactory & factory) { diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h index 404a9a7265e..fee3ba78d8d 100644 --- a/dbms/src/Storages/StorageDistributed.h +++ b/dbms/src/Storages/StorageDistributed.h @@ -11,6 +11,7 @@ #include #include #include +#include namespace DB @@ -105,8 +106,11 @@ public: /// ensure connection pool creation and return it ConnectionPoolPtr requireConnectionPool(const std::string & name); + void flushClusterNodesAllData(); + ClusterPtr getCluster() const; + ActionLock getActionLock(StorageActionBlockType type) override; String table_name; String remote_database; @@ -135,7 +139,9 @@ public: /// Creates connection_pool if not exists. void requireConnectionPool(const std::string & name, const StorageDistributed & storage); /// Creates directory_monitor if not exists. - void requireDirectoryMonitor(const std::string & name, StorageDistributed & storage); + void requireDirectoryMonitor(const std::string & name, StorageDistributed & storage, ActionBlocker & monitor_blocker); + + void flushAllData(); void shutdownAndDropAllData(); }; @@ -145,6 +151,8 @@ public: /// Used for global monotonic ordering of files to send. SimpleIncrement file_names_increment; + ActionBlocker monitors_blocker; + protected: StorageDistributed( const String & database_name, diff --git a/dbms/src/Storages/StorageMaterializedView.cpp b/dbms/src/Storages/StorageMaterializedView.cpp index 0779132c37e..c843312e3d7 100644 --- a/dbms/src/Storages/StorageMaterializedView.cpp +++ b/dbms/src/Storages/StorageMaterializedView.cpp @@ -370,6 +370,11 @@ void StorageMaterializedView::checkPartitionCanBeDropped(const ASTPtr & partitio target_table->checkPartitionCanBeDropped(partition); } +ActionLock StorageMaterializedView::getActionLock(StorageActionBlockType type) +{ + return has_inner_table ? getTargetTable()->getActionLock(type) : ActionLock{}; +} + void registerStorageMaterializedView(StorageFactory & factory) { factory.registerStorage("MaterializedView", [](const StorageFactory::Arguments & args) diff --git a/dbms/src/Storages/StorageMaterializedView.h b/dbms/src/Storages/StorageMaterializedView.h index 8214875528d..5b14b90d77b 100644 --- a/dbms/src/Storages/StorageMaterializedView.h +++ b/dbms/src/Storages/StorageMaterializedView.h @@ -51,6 +51,8 @@ public: StoragePtr getTargetTable() const; StoragePtr tryGetTargetTable() const; + ActionLock getActionLock(StorageActionBlockType type) override; + BlockInputStreams read( const Names & column_names, const SelectQueryInfo & query_info, diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 3edf94870f4..f2ba47ba4ce 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3986,8 +3986,6 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query else throw Exception("Can't proxy this query. Unsupported query type", ErrorCodes::NOT_IMPLEMENTED); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithoutFailover(global_context.getSettingsRef()); - const auto & query_settings = query_context.getSettingsRef(); const auto & query_client_info = query_context.getClientInfo(); String user = query_client_info.current_user; @@ -4003,7 +4001,7 @@ void StorageReplicatedMergeTree::sendRequestToLeaderReplica(const ASTPtr & query leader_address.host, leader_address.queries_port, leader_address.database, - user, password, timeouts, "Follower replica"); + user, password, "Follower replica"); std::stringstream new_query_ss; formatAST(*new_query, new_query_ss, false, true); diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 6643ed6dc91..b22753313f6 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -14,7 +14,7 @@ Don't use Docker from your system repository. * [pip](https://pypi.python.org/pypi/pip). To install: `sudo apt-get install python-pip` * [py.test](https://docs.pytest.org/) testing framework. To install: `sudo -H pip install pytest` -* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python` +* [docker-compose](https://docs.docker.com/compose/) and additional python libraries. To install: `sudo -H pip install docker-compose docker dicttoxml kazoo PyMySQL psycopg2 pymongo tzlocal kafka-python protobuf` (highly not recommended) If you really want to use OS packages on modern debian/ubuntu instead of "pip": `sudo apt install -y docker docker-compose python-pytest python-dicttoxml python-docker python-pymysql python-pymongo python-tzlocal python-kazoo python-psycopg2 python-kafka` diff --git a/dbms/tests/integration/test_distributed_respect_user_timeouts/__init__.py b/dbms/tests/integration/test_distributed_respect_user_timeouts/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/remote_servers.xml b/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/remote_servers.xml new file mode 100644 index 00000000000..ebce4697529 --- /dev/null +++ b/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/set_distributed_defaults.xml b/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/set_distributed_defaults.xml new file mode 100644 index 00000000000..194eb1ebb87 --- /dev/null +++ b/dbms/tests/integration/test_distributed_respect_user_timeouts/configs/set_distributed_defaults.xml @@ -0,0 +1,35 @@ + + + + 3 + 1000 + 1 + + + 5 + 3000 + 1 + + + + + + + + ::/0 + + default + default + + + + + ::/0 + + delays + default + + + + + diff --git a/dbms/tests/integration/test_distributed_respect_user_timeouts/test.py b/dbms/tests/integration/test_distributed_respect_user_timeouts/test.py new file mode 100644 index 00000000000..bbab53edeba --- /dev/null +++ b/dbms/tests/integration/test_distributed_respect_user_timeouts/test.py @@ -0,0 +1,141 @@ +import itertools +import timeit + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager +from helpers.test_tools import TSV + + +cluster = ClickHouseCluster(__file__) + +NODES = {'node' + str(i): cluster.add_instance( + 'node' + str(i), + main_configs=['configs/remote_servers.xml'], + user_configs=['configs/set_distributed_defaults.xml'], +) for i in (1, 2)} + +CREATE_TABLES_SQL = ''' +CREATE DATABASE test; + +CREATE TABLE base_table( + node String +) +ENGINE = MergeTree +PARTITION BY node +ORDER BY node; + +CREATE TABLE distributed_table +ENGINE = Distributed(test_cluster, default, base_table) AS base_table; +''' + +INSERT_SQL_TEMPLATE = "INSERT INTO base_table VALUES ('{node_id}')" + +SELECTS_SQL = { + 'distributed': 'SELECT node FROM distributed_table ORDER BY node', + 'remote': ("SELECT node FROM remote('node1,node2', default.base_table) " + "ORDER BY node"), +} + +EXCEPTION_NETWORK = 'e.displayText() = DB::NetException: ' +EXCEPTION_TIMEOUT = 'Timeout exceeded while reading from socket (' +EXCEPTION_CONNECT = 'Timeout: connect timed out: ' + +TIMEOUT_MEASUREMENT_EPS = 0.01 + +EXPECTED_BEHAVIOR = { + 'default': { + 'times': 3, + 'timeout': 1, + }, + 'ready_to_wait': { + 'times': 5, + 'timeout': 3, + }, +} + + +def _check_exception(exception, expected_tries=3): + lines = exception.split('\n') + + assert len(lines) > 4, "Unexpected exception (expected: timeout info)" + + assert lines[0].startswith('Received exception from server (version') + + assert lines[1].startswith('Code: 279') + assert lines[1].endswith('All connection tries failed. Log: ') + + assert lines[2] == '', "Unexpected exception text (expected: empty line)" + + for i, line in enumerate(lines[3:3 + expected_tries]): + expected_lines = ( + 'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_TIMEOUT, + 'Code: 209, ' + EXCEPTION_NETWORK + EXCEPTION_CONNECT, + ) + + assert any(line.startswith(expected) for expected in expected_lines), \ + 'Unexpected exception at one of the connection attempts' + + assert lines[3 + expected_tries] == '', 'Wrong number of connect attempts' + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node_id, node in NODES.items(): + node.query(CREATE_TABLES_SQL) + node.query(INSERT_SQL_TEMPLATE.format(node_id=node_id)) + + yield cluster + + finally: + cluster.shutdown() + + +def _check_timeout_and_exception(node, user, query_base): + repeats = EXPECTED_BEHAVIOR[user]['times'] + expected_timeout = EXPECTED_BEHAVIOR[user]['timeout'] * repeats + + start = timeit.default_timer() + exception = node.query_and_get_error(SELECTS_SQL[query_base], user=user) + + # And it should timeout no faster than: + measured_timeout = timeit.default_timer() - start + + assert measured_timeout >= expected_timeout - TIMEOUT_MEASUREMENT_EPS + + # And exception should reflect connection attempts: + _check_exception(exception, repeats) + + +@pytest.mark.parametrize( + ('first_user', 'node_name', 'query_base'), + tuple(itertools.product(EXPECTED_BEHAVIOR, NODES, SELECTS_SQL)), +) +def test_reconnect(started_cluster, node_name, first_user, query_base): + node = NODES[node_name] + + # Everything is up, select should work: + assert TSV(node.query(SELECTS_SQL[query_base], + user=first_user)) == TSV('node1\nnode2') + + with PartitionManager() as pm: + # Break the connection. + pm.partition_instances(*NODES.values()) + + # Now it shouldn't: + _check_timeout_and_exception(node, first_user, query_base) + + # Other user should have different timeout and exception + _check_timeout_and_exception( + node, + 'default' if first_user != 'default' else 'ready_to_wait', + query_base, + ) + + # select should work again: + assert TSV(node.query(SELECTS_SQL[query_base], + user=first_user)) == TSV('node1\nnode2') diff --git a/dbms/tests/integration/test_distributed_system_query/__init__.py b/dbms/tests/integration/test_distributed_system_query/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_distributed_system_query/configs/remote_servers.xml b/dbms/tests/integration/test_distributed_system_query/configs/remote_servers.xml new file mode 100644 index 00000000000..ebce4697529 --- /dev/null +++ b/dbms/tests/integration/test_distributed_system_query/configs/remote_servers.xml @@ -0,0 +1,18 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + diff --git a/dbms/tests/integration/test_distributed_system_query/test.py b/dbms/tests/integration/test_distributed_system_query/test.py new file mode 100644 index 00000000000..c6e28c44034 --- /dev/null +++ b/dbms/tests/integration/test_distributed_system_query/test.py @@ -0,0 +1,41 @@ +from contextlib import contextmanager + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml']) +node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml']) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + for node in (node1, node2): + node.query('''CREATE TABLE local_table(id UInt32, val String) ENGINE = MergeTree ORDER BY id;''') + + node1.query('''CREATE TABLE distributed_table(id UInt32, val String) ENGINE = Distributed(test_cluster, default, local_table, id);''') + + yield cluster + + finally: + cluster.shutdown() + + +def test_start_and_stop_replica_send(started_cluster): + node1.query("SYSTEM STOP DISTRIBUTED SENDS distributed_table;") + + node1.query("INSERT INTO distributed_table VALUES (0, 'node1')") + node1.query("INSERT INTO distributed_table VALUES (1, 'node2')") + + # Write only to this node when stop distributed sends + assert node1.query("SELECT COUNT() FROM distributed_table").rstrip() == '1' + + node1.query("SYSTEM START DISTRIBUTED SENDS distributed_table;") + node1.query("SYSTEM FLUSH DISTRIBUTED distributed_table;") + assert node1.query("SELECT COUNT() FROM distributed_table").rstrip() == '2' + diff --git a/dbms/tests/integration/test_mysql_protocol/test.py b/dbms/tests/integration/test_mysql_protocol/test.py index 3118f2fbf54..04e6ac1ee8b 100644 --- a/dbms/tests/integration/test_mysql_protocol/test.py +++ b/dbms/tests/integration/test_mysql_protocol/test.py @@ -51,7 +51,7 @@ def test_mysql_client(mysql_client, server_address): -e "SELECT 'тест' as b;" '''.format(host=server_address, port=server_port), demux=True) - assert stdout == 'a\n1\nb\nтест\n' + assert stdout == '\n'.join(['a', '1', 'b', 'тест', '']) code, (stdout, stderr) = mysql_client.exec_run(''' mysql --protocol tcp -h {host} -P {port} default -u default --password=abc -e "select 1 as a;" @@ -75,14 +75,17 @@ def test_mysql_client(mysql_client, server_address): mysql --protocol tcp -h {host} -P {port} default -u default --password=123 -e "CREATE DATABASE x;" -e "USE x;" - -e "CREATE TABLE table1 (a UInt32) ENGINE = Memory;" + -e "CREATE TABLE table1 (column UInt32) ENGINE = Memory;" -e "INSERT INTO table1 VALUES (0), (1), (5);" -e "INSERT INTO table1 VALUES (0), (1), (5);" - -e "SELECT * FROM table1 ORDER BY a;" + -e "SELECT * FROM table1 ORDER BY column;" -e "DROP DATABASE x;" + -e "CREATE TEMPORARY TABLE tmp (tmp_column UInt32);" + -e "INSERT INTO tmp VALUES (0), (1);" + -e "SELECT * FROM tmp ORDER BY tmp_column;" '''.format(host=server_address, port=server_port), demux=True) - assert stdout == 'a\n0\n0\n1\n1\n5\n5\n' + assert stdout == '\n'.join(['column', '0', '0', '1', '1', '5', '5', 'tmp_column', '0', '1', '']) def test_python_client(server_address): diff --git a/dbms/tests/performance/merge_tree_many_partitions.xml b/dbms/tests/performance/merge_tree_many_partitions.xml new file mode 100644 index 00000000000..1b21bbcf780 --- /dev/null +++ b/dbms/tests/performance/merge_tree_many_partitions.xml @@ -0,0 +1,30 @@ + + merge_tree_many_partitions + loop + + CREATE TABLE bad_partitions (x UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x + INSERT INTO bad_partitions SELECT * FROM numbers(10000) + + + + 5 + 10000 + + + 100 + 60000 + + + + + + + + + 0 + + + SELECT count() FROM bad_partitions + + DROP TABLE IF EXISTS bad_partitions + diff --git a/dbms/tests/performance/merge_tree_many_partitions_2.xml b/dbms/tests/performance/merge_tree_many_partitions_2.xml new file mode 100644 index 00000000000..a38a2cc0139 --- /dev/null +++ b/dbms/tests/performance/merge_tree_many_partitions_2.xml @@ -0,0 +1,30 @@ + + merge_tree_many_partitions_2 + loop + + CREATE TABLE bad_partitions (a UInt64, b UInt64, c UInt64, d UInt64, e UInt64, f UInt64, g UInt64, h UInt64, i UInt64, j UInt64, k UInt64, l UInt64, m UInt64, n UInt64, o UInt64, p UInt64, q UInt64, r UInt64, s UInt64, t UInt64, u UInt64, v UInt64, w UInt64, x UInt64, y UInt64, z UInt64) ENGINE = MergeTree PARTITION BY x ORDER BY x + INSERT INTO bad_partitions (x) SELECT * FROM numbers(10000) + + + + 5 + 10000 + + + 100 + 60000 + + + + + + + + + 0 + + + SELECT sum(ignore(*)) FROM bad_partitions + + DROP TABLE IF EXISTS bad_partitions + diff --git a/dbms/tests/queries/0_stateless/00717_merge_and_distributed.sql b/dbms/tests/queries/0_stateless/00717_merge_and_distributed.sql index 4b5090d8e5e..761e2d67877 100644 --- a/dbms/tests/queries/0_stateless/00717_merge_and_distributed.sql +++ b/dbms/tests/queries/0_stateless/00717_merge_and_distributed.sql @@ -18,9 +18,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1'); SELECT *, _table FROM merge(currentDatabase(), 'test_local_1') ORDER BY _table; SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1') GROUP BY _table ORDER BY _table; SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table = 'test_local_1'; -SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError 8 } +SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError 16 } SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table in ('test_local_1', 'test_local_2'); -SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError 8 } +SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError 16 } SELECT '--------------Single Distributed------------'; SELECT * FROM merge(currentDatabase(), 'test_distributed_1'); @@ -36,9 +36,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _ta SELECT *, _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _table; SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') GROUP BY _table ORDER BY _table; SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table = 'test_local_1'; -SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- {serverError 8} +SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- { serverError 16 } SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- {serverError 8} +SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- { serverError 16 } SELECT '--------------Local Merge Distributed------------'; SELECT * FROM merge(currentDatabase(), 'test_local_1|test_distributed_2') ORDER BY _table; @@ -80,7 +80,7 @@ SELECT '--------------Implicit type conversion------------'; SELECT * FROM merge(currentDatabase(), 'test_s64_distributed|test_u64_distributed') ORDER BY value; SELECT * FROM merge(currentDatabase(), 'test_s64_distributed|test_u64_distributed') WHERE date = '2018-08-01' ORDER BY value; SELECT * FROM merge(currentDatabase(), 'test_s64_distributed|test_u64_distributed') WHERE _table = 'test_u64_distributed' ORDER BY value; -SELECT * FROM merge(currentDatabase(), 'test_s64_distributed|test_u64_distributed') WHERE value = 1; -- { serverError 171} +SELECT * FROM merge(currentDatabase(), 'test_s64_distributed|test_u64_distributed') WHERE value = 1; -- { serverError 171 } DROP TABLE IF EXISTS test_u64_local; DROP TABLE IF EXISTS test_s64_local; diff --git a/dbms/tests/queries/0_stateless/00870_t64_codec.reference b/dbms/tests/queries/0_stateless/00870_t64_codec.reference new file mode 100644 index 00000000000..d50f908f921 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00870_t64_codec.reference @@ -0,0 +1,7 @@ +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +1 1 1 1 1 1 1 1 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 diff --git a/dbms/tests/queries/0_stateless/00870_t64_codec.sql b/dbms/tests/queries/0_stateless/00870_t64_codec.sql new file mode 100644 index 00000000000..654f7fa7818 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00870_t64_codec.sql @@ -0,0 +1,101 @@ +DROP TABLE IF EXISTS test.t64; + +CREATE TABLE test.t64 +( + u8 UInt8, + t_u8 UInt8 Codec(T64, ZSTD), + u16 UInt16, + t_u16 UInt16 Codec(T64, ZSTD), + u32 UInt32, + t_u32 UInt32 Codec(T64, ZSTD), + u64 UInt64, + t_u64 UInt64 Codec(T64, ZSTD) +) ENGINE MergeTree() ORDER BY tuple(); + +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(1); +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(2); +INSERT INTO test.t64 SELECT 42 AS x, x, x, x, x, x, x, x FROM numbers(4); + +SELECT * FROM test.t64 ORDER BY u64; + +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(intExp2(8)); +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(intExp2(9)); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(16) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(16) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (intExp2(16) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(64); +INSERT INTO test.t64 SELECT (intExp2(16) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (intExp2(16) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(24) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(24) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (intExp2(24) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(128); +INSERT INTO test.t64 SELECT (intExp2(24) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +INSERT INTO test.t64 SELECT (intExp2(24) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(32) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(32) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(32) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(256); +INSERT INTO test.t64 SELECT (intExp2(32) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(257); +INSERT INTO test.t64 SELECT (intExp2(32) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(257); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(40) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(40) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(40) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(512); +INSERT INTO test.t64 SELECT (intExp2(40) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +INSERT INTO test.t64 SELECT (intExp2(40) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(48) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(48) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(48) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(1024); +INSERT INTO test.t64 SELECT (intExp2(48) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +INSERT INTO test.t64 SELECT (intExp2(48) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(56) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(56) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(56) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(2048); +INSERT INTO test.t64 SELECT (intExp2(56) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +INSERT INTO test.t64 SELECT (intExp2(56) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +INSERT INTO test.t64 SELECT (intExp2(63) + number * intExp2(62)) AS x, x, x, x, x, x, x, x FROM numbers(10); +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +OPTIMIZE TABLE test.t64 FINAL; + +SELECT * FROM test.t64 WHERE u8 != t_u8; +SELECT * FROM test.t64 WHERE u16 != t_u16; +SELECT * FROM test.t64 WHERE u32 != t_u32; +SELECT * FROM test.t64 WHERE u64 != t_u64; + +DROP TABLE test.t64; diff --git a/dbms/tests/queries/0_stateless/00871_t64_codec_signed.reference b/dbms/tests/queries/0_stateless/00871_t64_codec_signed.reference new file mode 100644 index 00000000000..63e6e93e150 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00871_t64_codec_signed.reference @@ -0,0 +1,9 @@ +-1 -1 -1 -1 -1 -1 -1 -1 +-1 -1 -1 -1 -1 -1 -1 -1 +0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 +1 1 1 1 1 1 1 1 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 +42 42 42 42 42 42 42 42 diff --git a/dbms/tests/queries/0_stateless/00871_t64_codec_signed.sql b/dbms/tests/queries/0_stateless/00871_t64_codec_signed.sql new file mode 100644 index 00000000000..5e0afe71a02 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00871_t64_codec_signed.sql @@ -0,0 +1,128 @@ +DROP TABLE IF EXISTS test.t64; + +CREATE TABLE test.t64 +( + i8 Int8, + t_i8 Int8 Codec(T64, LZ4), + i16 Int16, + t_i16 Int16 Codec(T64, LZ4), + i32 Int32, + t_i32 Int32 Codec(T64, LZ4), + i64 Int64, + t_i64 Int64 Codec(T64, LZ4) +) ENGINE MergeTree() ORDER BY tuple(); + +INSERT INTO test.t64 SELECT toInt32(number)-1 AS x, x, x, x, x, x, x, x FROM numbers(2); +INSERT INTO test.t64 SELECT toInt32(number)-1 AS x, x, x, x, x, x, x, x FROM numbers(3); +INSERT INTO test.t64 SELECT 42 AS x, x, x, x, x, x, x, x FROM numbers(4); + +SELECT * FROM test.t64 ORDER BY i64; + +INSERT INTO test.t64 SELECT (intExp2(8) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +SELECT i8, t_i8 FROM test.t64 WHERE i8 != t_i8; + +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(intExp2(8)); +INSERT INTO test.t64 SELECT number AS x, x, x, x, x, x, x, x FROM numbers(intExp2(9)); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(16) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(16) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (intExp2(16) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(64); +INSERT INTO test.t64 SELECT (intExp2(16) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (intExp2(16) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(16)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(16)) + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(16)) + number) AS x, x, x, x, x, x, x, x FROM numbers(64); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(16)) + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(16)) + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(24) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(24) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (intExp2(24) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(128); +INSERT INTO test.t64 SELECT (intExp2(24) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +INSERT INTO test.t64 SELECT (intExp2(24) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(24)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(24)) + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(24)) + number) AS x, x, x, x, x, x, x, x FROM numbers(128); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(24)) + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(24)) + number) AS x, x, x, x, x, x, x, x FROM numbers(129); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(32) - 2 + number) AS x, x, x, x, x, x, x, x FROM numbers(2); +INSERT INTO test.t64 SELECT (intExp2(32) - 2 + number) AS x, x, x, x, x, x, x, x FROM numbers(3); +INSERT INTO test.t64 SELECT (intExp2(32) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(64); +INSERT INTO test.t64 SELECT (intExp2(32) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (intExp2(32) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(32)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(32)) + number) AS x, x, x, x, x, x, x, x FROM numbers(11); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(32)) + number) AS x, x, x, x, x, x, x, x FROM numbers(64); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(32)) + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(32)) + number) AS x, x, x, x, x, x, x, x FROM numbers(65); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(40) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(40) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(40) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(512); +INSERT INTO test.t64 SELECT (intExp2(40) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +INSERT INTO test.t64 SELECT (intExp2(40) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(40)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(40)) + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(40)) + number) AS x, x, x, x, x, x, x, x FROM numbers(512); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(40)) + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(40)) + number) AS x, x, x, x, x, x, x, x FROM numbers(513); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(48) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(48) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(48) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(1024); +INSERT INTO test.t64 SELECT (intExp2(48) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +INSERT INTO test.t64 SELECT (intExp2(48) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(48)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(48)) + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(48)) + number) AS x, x, x, x, x, x, x, x FROM numbers(1024); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(48)) + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(48)) + number) AS x, x, x, x, x, x, x, x FROM numbers(1025); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +INSERT INTO test.t64 SELECT (intExp2(56) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (intExp2(56) - 10 + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (intExp2(56) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(2048); +INSERT INTO test.t64 SELECT (intExp2(56) - 64 + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +INSERT INTO test.t64 SELECT (intExp2(56) - 1 + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(56)) + number) AS x, x, x, x, x, x, x, x FROM numbers(10); +INSERT INTO test.t64 SELECT (10 - toInt64(intExp2(56)) + number) AS x, x, x, x, x, x, x, x FROM numbers(20); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(56)) + number) AS x, x, x, x, x, x, x, x FROM numbers(2048); +INSERT INTO test.t64 SELECT (64 - toInt64(intExp2(56)) + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +INSERT INTO test.t64 SELECT (1 - toInt64(intExp2(56)) + number) AS x, x, x, x, x, x, x, x FROM numbers(2049); +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +OPTIMIZE TABLE test.t64 FINAL; + +SELECT * FROM test.t64 WHERE i8 != t_i8; +SELECT * FROM test.t64 WHERE i16 != t_i16; +SELECT * FROM test.t64 WHERE i32 != t_i32; +SELECT * FROM test.t64 WHERE i64 != t_i64; + +DROP TABLE test.t64; diff --git a/docs/en/getting_started/example_datasets/ontime.md b/docs/en/getting_started/example_datasets/ontime.md index c19785bc614..d076f5b6469 100644 --- a/docs/en/getting_started/example_datasets/ontime.md +++ b/docs/en/getting_started/example_datasets/ontime.md @@ -163,31 +163,54 @@ clickhouse-client --query "select count(*) from datasets.ontime" Q0. ``` sql -select avg(c1) from (select Year, Month, count(*) as c1 from ontime group by Year, Month); +SELECT avg(c1) +FROM +( + SELECT Year, Month, count(*) AS c1 + FROM ontime + GROUP BY Year, Month +); ``` Q1. The number of flights per day from the year 2000 to 2008 ``` sql -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC; +SELECT DayOfWeek, count(*) AS c +FROM ontime +WHERE Year>=2000 AND Year<=2008 +GROUP BY DayOfWeek +ORDER BY c DESC; ``` Q2. The number of flights delayed by more than 10 minutes, grouped by the day of the week, for 2000-2008 ``` sql -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC +SELECT DayOfWeek, count(*) AS c +FROM ontime +WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 +GROUP BY DayOfWeek +ORDER BY c DESC; ``` Q3. The number of delays by airport for 2000-2008 ``` sql -SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10 +SELECT Origin, count(*) AS c +FROM ontime +WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 +GROUP BY Origin +ORDER BY c DESC +LIMIT 10; ``` Q4. The number of delays by carrier for 2007 ``` sql -SELECT Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year = 2007 GROUP BY Carrier ORDER BY count(*) DESC +SELECT Carrier, count(*) +FROM ontime +WHERE DepDelay>10 AND Year=2007 +GROUP BY Carrier +ORDER BY count(*) DESC; ``` Q5. The percentage of delays by carrier for 2007 @@ -219,7 +242,11 @@ ORDER BY c3 DESC; Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay>10)*100 AS c3 +FROM ontime +WHERE Year=2007 +GROUP BY Carrier +ORDER BY Carrier ``` Q6. The previous request for a broader range of years, 2000-2008 @@ -233,7 +260,7 @@ FROM count(*) AS c FROM ontime WHERE DepDelay>10 - AND Year >= 2000 AND Year <= 2008 + AND Year>=2000 AND Year<=2008 GROUP BY Carrier ) ANY INNER JOIN @@ -242,7 +269,7 @@ ANY INNER JOIN Carrier, count(*) AS c2 FROM ontime - WHERE Year >= 2000 AND Year <= 2008 + WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier ) USING Carrier ORDER BY c3 DESC; @@ -251,7 +278,11 @@ ORDER BY c3 DESC; Better version of the same query: ``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay>10)*100 AS c3 +FROM ontime +WHERE Year>=2000 AND Year<=2008 +GROUP BY Carrier +ORDER BY Carrier; ``` Q7. Percentage of flights delayed for more than 10 minutes, by year @@ -275,41 +306,51 @@ ANY INNER JOIN from ontime GROUP BY Year ) USING (Year) -ORDER BY Year +ORDER BY Year; ``` Better version of the same query: ``` sql -SELECT Year, avg(DepDelay > 10) FROM ontime GROUP BY Year ORDER BY Year +SELECT Year, avg(DepDelay>10) +FROM ontime +GROUP BY Year +ORDER BY Year; ``` Q8. The most popular destinations by the number of directly connected cities for various year ranges ``` sql -SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime WHERE Year >= 2000 and Year <= 2010 GROUP BY DestCityName ORDER BY u DESC LIMIT 10; +SELECT DestCityName, uniqExact(OriginCityName) AS u F +ROM ontime +WHERE Year>=2000 and Year<=2010 +GROUP BY DestCityName +ORDER BY u DESC +LIMIT 10; ``` Q9. ``` sql -select Year, count(*) as c1 from ontime group by Year; +SELECT Year, count(*) AS c1 +FROM ontime +GROUP BY Year; ``` Q10. ``` sql -select - min(Year), max(Year), Carrier, count(*) as cnt, - sum(ArrDelayMinutes>30) as flights_delayed, - round(sum(ArrDelayMinutes>30)/count(*),2) as rate +SELECT + min(Year), max(Year), Carrier, count(*) AS cnt, + sum(ArrDelayMinutes>30) AS flights_delayed, + round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime WHERE - DayOfWeek not in (6,7) and OriginState not in ('AK', 'HI', 'PR', 'VI') - and DestState not in ('AK', 'HI', 'PR', 'VI') - and FlightDate < '2010-01-01' + DayOfWeek NOT IN (6,7) AND OriginState NOT IN ('AK', 'HI', 'PR', 'VI') + AND DestState NOT IN ('AK', 'HI', 'PR', 'VI') + AND FlightDate < '2010-01-01' GROUP by Carrier -HAVING cnt > 100000 and max(Year) > 1990 +HAVING cnt>100000 and max(Year)>1990 ORDER by rate DESC LIMIT 1000; ``` @@ -317,15 +358,39 @@ LIMIT 1000; Bonus: ``` sql -SELECT avg(cnt) FROM (SELECT Year,Month,count(*) AS cnt FROM ontime WHERE DepDel15=1 GROUP BY Year,Month) +SELECT avg(cnt) +FROM +( + SELECT Year,Month,count(*) AS cnt + FROM ontime + WHERE DepDel15=1 + GROUP BY Year,Month +); -select avg(c1) from (select Year,Month,count(*) as c1 from ontime group by Year,Month) +SELECT avg(c1) FROM +( + SELECT Year,Month,count(*) AS c1 + FROM ontime + GROUP BY Year,Month +); -SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime GROUP BY DestCityName ORDER BY u DESC LIMIT 10; +SELECT DestCityName, uniqExact(OriginCityName) AS u +FROM ontime +GROUP BY DestCityName +ORDER BY u DESC +LIMIT 10; -SELECT OriginCityName, DestCityName, count() AS c FROM ontime GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10; +SELECT OriginCityName, DestCityName, count() AS c +FROM ontime +GROUP BY OriginCityName, DestCityName +ORDER BY c DESC +LIMIT 10; -SELECT OriginCityName, count() AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10; +SELECT OriginCityName, count() AS c +FROM ontime +GROUP BY OriginCityName +ORDER BY c DESC +LIMIT 10; ``` This performance test was created by Vadim Tkachenko. See: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 7b834718feb..f9b37be8c23 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -753,8 +753,8 @@ Default value: 0. Enables or disables: -1. Rewriting of queries with multiple [JOIN clauses](../../query_language/select.md#select-join) from the syntax with commas to the `JOIN ON/USING` syntax. If the setting value is 0, ClickHouse doesn't process queries with the syntax with commas, and throws an exception. -2. Converting of `CROSS JOIN` into `INNER JOIN` if conditions of join allow it. +1. Rewriting queries for join from the syntax with commas to the `JOIN ON/USING` syntax. If the setting value is 0, ClickHouse doesn't process queries with syntax that uses commas, and throws an exception. +2. Converting `CROSS JOIN` to `INNER JOIN` if `WHERE` conditions allow it. Possible values: @@ -763,6 +763,11 @@ Possible values: Default value: 1. +**See Also** + +- [Multiple JOIN](../../query_language/select.md#select-join) + + ## count_distinct_implementation {#settings-count_distinct_implementation} Specifies which of the `uniq*` functions should be used for performing the [COUNT(DISTINCT ...)](../../query_language/agg_functions/reference.md#agg_function-count) construction. diff --git a/docs/en/operations/table_engines/odbc.md b/docs/en/operations/table_engines/odbc.md index 44460cbdace..a3e7cda4a39 100644 --- a/docs/en/operations/table_engines/odbc.md +++ b/docs/en/operations/table_engines/odbc.md @@ -2,7 +2,7 @@ Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). -To implement ODBC connection safely, ClickHouse uses the separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from the `clickhouse-server` program, the problems in the driver can crash the ClickHouse server. ClickHouse starts the `clickhouse-odbc-bridge` program automatically when it is required. The ODBC bridge program is installed by the same package as the `clickhouse-server`. +To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. This engine supports the [Nullable](../../data_types/nullable.md) data type. @@ -18,12 +18,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ENGINE = ODBC(connection_settings, external_database, external_table) ``` -See the detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. +See a detailed description of the [CREATE TABLE](../../query_language/create.md#create-table-query) query. -The table structure can be not the same as the source table structure: +The table structure can differ from the source table structure: -- Names of columns should be the same as in the source table, but you can use just some of these columns in any order. -- Types of columns may differ from the types in the source table. ClickHouse tries to [cast](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) values into the ClickHouse data types. +- Column names should be the same as in the source table, but you can use just some of these columns and in any order. +- Column types may differ from those in the source table. ClickHouse tries to [cast](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) values to the ClickHouse data types. **Engine Parameters** @@ -33,13 +33,13 @@ The table structure can be not the same as the source table structure: ## Usage Example -**Getting data from the local MySQL installation via ODBC** +**Retrieving data from the local MySQL installation via ODBC** -This example is for linux Ubuntu 18.04 and MySQL server 5.7. +This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7. -Ensure that there are unixODBC and MySQL Connector are installed. +Ensure that unixODBC and MySQL Connector are installed. -By default (if installed from packages) ClickHouse starts on behalf of the user `clickhouse`. Thus, you need to create and configure this user at MySQL server. +By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus, you need to create and configure this user in the MySQL server. ``` sudo mysql @@ -60,7 +60,7 @@ USERNAME = clickhouse PASSWORD = clickhouse ``` -You can check the connection by the `isql` utility from the unixODBC installation. +You can check the connection using the `isql` utility from the unixODBC installation. ``` isql -v mysqlconn @@ -93,7 +93,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -Table in ClickHouse, getting data from the MySQL table: +Table in ClickHouse, retrieving data from the MySQL table: ```sql CREATE TABLE odbc_t @@ -115,6 +115,6 @@ SELECT * FROM odbc_t ## See Also - [ODBC external dictionaries](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) -- [ODBC table function](../../query_language/table_functions/odbc.md). +- [ODBC table function](../../query_language/table_functions/odbc.md) -[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/jdbc/) +[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/odbc/) diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index ab98cabbb57..87254fad58c 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -839,12 +839,12 @@ simpleLinearRegression(x, y) Parameters: -- `x` — Column with values of dependent variable. -- `y` — Column with explanatory variable. +- `x` — Column with dependent variable values. +- `y` — Column with explanatory variable values. Returned values: -Parameters `(a, b)` of the resulting line `y = a*x + b`. +Constants `(a, b)` of the resulting line `y = a*x + b`. **Examples** diff --git a/docs/en/query_language/dicts/external_dicts_dict_structure.md b/docs/en/query_language/dicts/external_dicts_dict_structure.md index 2b84e0157f8..d8ecf4e2efa 100644 --- a/docs/en/query_language/dicts/external_dicts_dict_structure.md +++ b/docs/en/query_language/dicts/external_dicts_dict_structure.md @@ -105,12 +105,14 @@ Configuration example: Configuration fields: -- `name` – The column name. -- `type` – The column type. Sets the method for interpreting data in the source. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the source table, but it can be uploaded as `String`. -- `null_value` – The default value for a non-existing element. In the example, it is an empty string. -- `expression` – The attribute can be an expression. The tag is not required. -- `hierarchical` – Hierarchical support. Mirrored to the parent identifier. By default, ` false`. -- `injective` – Whether the `id -> attribute` image is injective. If ` true`, then you can optimize the ` GROUP BY` clause. By default, `false`. -- `is_object_id` – Whether the query is executed for a MongoDB document by `ObjectID`. +Tag | Description | Required +----|-------------|--------- +`name`| Column name. | Yes +`type`| Column type.
Sets the method for interpreting data in the source. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the source table, but it can be uploaded as `String`. | Yes +`null_value` | Default value for a non-existing element.
In the example, it is an empty string. | Yes +`expression` | [Expression](../syntax.md#syntax-expressions) that ClickHouse executes on the value.
The expression can be a column name in the remote SQL database. Thus, you can use it for creating an alias for the remote column.

Default value: no expression. | No +`hierarchical` | Hierarchical support. Mirrored to the parent identifier.

Default value: `false`. | No +`injective` | Flag that shows whether the `id -> attribute` image is injective.
If `true`, then you can optimize the `GROUP BY` clause.

Default value: `false`. | No +`is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.

Default value: `false`. | No [Original article](https://clickhouse.yandex/docs/en/query_language/dicts/external_dicts_dict_structure/) diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md index 3cfe706ae2c..a32d3cde6f6 100644 --- a/docs/en/query_language/select.md +++ b/docs/en/query_language/select.md @@ -449,23 +449,25 @@ See the standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) descriptio #### Multiple JOIN -Performing queries, ClickHouse rewrites multiple joins into the sequence of two-table joins. If there are four tables for join ClickHouse joins the first and the second, then joins the result with the third table, and at the last step, it joins the fourth one. +Performing queries, ClickHouse rewrites multi-table joins into the sequence of two-table joins. For example, if there are four tables for join ClickHouse joins the first and the second, then joins the result with the third table, and at the last step, it joins the fourth one. -If a query contains `WHERE` clause, ClickHouse tries to push down filters from this clause into the intermediate join. If it cannot apply the filter to each intermediate join, ClickHouse applies the filters after all joins are completed. +If a query contains the `WHERE` clause, ClickHouse tries to pushdown filters from this clause through the intermediate join. If it cannot apply the filter to each intermediate join, ClickHouse applies the filters after all joins are completed. -We recommend the `JOIN ON` or `JOIN USING` syntax for creating a query. For example: +We recommend the `JOIN ON` or `JOIN USING` syntax for creating queries. For example: ``` SELECT * FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.a = t3.a ``` -Also, you can use comma separated list of tables for join. Works only with the [allow_experimental_cross_to_join_conversion = 1](../operations/settings/settings.md#settings-allow_experimental_cross_to_join_conversion) setting. +You can use comma-separated lists of tables in the `FROM` clause. This works only with the [allow_experimental_cross_to_join_conversion = 1](../operations/settings/settings.md#settings-allow_experimental_cross_to_join_conversion) setting. For example: - For example, `SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a` +``` +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a +``` Don't mix these syntaxes. -ClickHouse doesn't support the syntax with commas directly, so we don't recommend to use it. The algorithm tries to rewrite the query in terms of `CROSS` and `INNER` `JOIN` clauses and then proceeds the query processing. When rewriting the query, ClickHouse tries to optimize performance and memory consumption. By default, ClickHouse treats comma as an `INNER JOIN` clause and converts it to `CROSS JOIN` when the algorithm cannot guaranty that `INNER JOIN` returns required data. +ClickHouse doesn't directly support syntax with commas, so we don't recommend using them. The algorithm tries to rewrite the query in terms of `CROSS JOIN` and `INNER JOIN` clauses and then proceeds to query processing. When rewriting the query, ClickHouse tries to optimize performance and memory consumption. By default, ClickHouse treats commas as an `INNER JOIN` clause and converts `INNER JOIN` to `CROSS JOIN` when the algorithm cannot guarantee that `INNER JOIN` returns the required data. #### ANY or ALL Strictness @@ -558,14 +560,15 @@ If the `JOIN` keys are [Nullable](../data_types/nullable.md) fields, the rows wh #### Syntax Limitations -For multiple `JOIN` clauses in the single `SELECT` query: +For multiple `JOIN` clauses in a single `SELECT` query: - Taking all the columns via `*` is available only if tables are joined, not subqueries. - The `PREWHERE` clause is not available. -For `ON`, `WHERE` and `GROUP BY` clauses: +For `ON`, `WHERE`, and `GROUP BY` clauses: + +- Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. -- Arbitrary expressions cannot be used in `ON`, `WHERE` and `GROUP BY` clauses, but you can define an expression in `SELECT` clause and then use it via alias in these clauses. ### WHERE Clause diff --git a/docs/en/query_language/table_functions/odbc.md b/docs/en/query_language/table_functions/odbc.md index c9bc4da034e..0c3204d4ca5 100644 --- a/docs/en/query_language/table_functions/odbc.md +++ b/docs/en/query_language/table_functions/odbc.md @@ -12,7 +12,7 @@ Parameters: - `external_database` — Name of a database in an external DBMS. - `external_table` — Name of a table in the `external_database`. -To implement ODBC connection safely, ClickHouse uses the separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from the `clickhouse-server` program, the problems in the driver can crash the ClickHouse server. ClickHouse starts the `clickhouse-odbc-bridge` program automatically when it is required. The ODBC bridge program is installed by the same package as the `clickhouse-server`. +To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. The fields with the `NULL` values from the external table are converted into the default values for the base data type. For example, if a remote MySQL table field has the `INT NULL` type it is converted to 0 (the default value for ClickHouse `Int32` data type). @@ -20,11 +20,11 @@ The fields with the `NULL` values from the external table are converted into the **Getting data from the local MySQL installation via ODBC** -This example is for linux Ubuntu 18.04 and MySQL server 5.7. +This example is checked for Ubuntu Linux 18.04 and MySQL server 5.7. -Ensure that there are unixODBC and MySQL Connector are installed. +Ensure that unixODBC and MySQL Connector are installed. -By default (if installed from packages) ClickHouse starts on behalf of the user `clickhouse`. Thus you need to create and configure this user at MySQL server. +By default (if installed from packages), ClickHouse starts as user `clickhouse`. Thus you need to create and configure this user in the MySQL server. ``` sudo mysql @@ -45,7 +45,7 @@ USERNAME = clickhouse PASSWORD = clickhouse ``` -You can check the connection by the `isql` utility from the unixODBC installation. +You can check the connection using the `isql` utility from the unixODBC installation. ``` isql -v mysqlconn @@ -78,7 +78,7 @@ mysql> select * from test; 1 row in set (0,00 sec) ``` -Getting data from the MySQL table: +Retrieving data from the MySQL table in ClickHouse: ```sql SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') diff --git a/docs/ru/getting_started/example_datasets/ontime.md b/docs/ru/getting_started/example_datasets/ontime.md index 27c8a58dd27..32f222b0630 100644 --- a/docs/ru/getting_started/example_datasets/ontime.md +++ b/docs/ru/getting_started/example_datasets/ontime.md @@ -163,31 +163,54 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.ontime" Q0. ``` sql -select avg(c1) from (select Year, Month, count(*) as c1 from ontime group by Year, Month); +SELECT avg(c1) +FROM +( + SELECT Year, Month, count(*) AS c1 + FROM ontime + GROUP BY Year, Month +); ``` Q1. Количество полетов в день с 2000 по 2008 года ``` sql -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC; +SELECT DayOfWeek, count(*) AS c +FROM ontime +WHERE Year>=2000 AND Year<=2008 +GROUP BY DayOfWeek +ORDER BY c DESC; ``` Q2. Количество полетов, задержанных более чем на 10 минут, с группировкой по дням неделе, за 2000-2008 года ``` sql -SELECT DayOfWeek, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY DayOfWeek ORDER BY c DESC +SELECT DayOfWeek, count(*) AS c +FROM ontime +WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 +GROUP BY DayOfWeek +ORDER BY c DESC; ``` Q3. Количество задержек по аэропортам за 2000-2008 ``` sql -SELECT Origin, count(*) AS c FROM ontime WHERE DepDelay>10 AND Year >= 2000 AND Year <= 2008 GROUP BY Origin ORDER BY c DESC LIMIT 10 +SELECT Origin, count(*) AS c +FROM ontime +WHERE DepDelay>10 AND Year>=2000 AND Year<=2008 +GROUP BY Origin +ORDER BY c DESC +LIMIT 10; ``` Q4. Количество задержек по перевозчикам за 2007 год ``` sql -SELECT Carrier, count(*) FROM ontime WHERE DepDelay>10 AND Year = 2007 GROUP BY Carrier ORDER BY count(*) DESC +SELECT Carrier, count(*) +FROM ontime +WHERE DepDelay>10 AND Year=2007 +GROUP BY Carrier +ORDER BY count(*) DESC; ``` Q5. Процент задержек по перевозчикам за 2007 год @@ -219,7 +242,11 @@ ORDER BY c3 DESC; Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year = 2007 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay>10)*100 AS c3 +FROM ontime +WHERE Year=2007 +GROUP BY Carrier +ORDER BY Carrier ``` Q6. Предыдущий запрос за более широкий диапазон лет, 2000-2008 @@ -233,7 +260,7 @@ FROM count(*) AS c FROM ontime WHERE DepDelay>10 - AND Year >= 2000 AND Year <= 2008 + AND Year>=2000 AND Year<=2008 GROUP BY Carrier ) ANY INNER JOIN @@ -242,7 +269,7 @@ ANY INNER JOIN Carrier, count(*) AS c2 FROM ontime - WHERE Year >= 2000 AND Year <= 2008 + WHERE Year>=2000 AND Year<=2008 GROUP BY Carrier ) USING Carrier ORDER BY c3 DESC; @@ -251,7 +278,11 @@ ORDER BY c3 DESC; Более оптимальная версия того же запроса: ``` sql -SELECT Carrier, avg(DepDelay > 10) * 100 AS c3 FROM ontime WHERE Year >= 2000 AND Year <= 2008 GROUP BY Carrier ORDER BY Carrier +SELECT Carrier, avg(DepDelay>10)*100 AS c3 +FROM ontime +WHERE Year>=2000 AND Year<=2008 +GROUP BY Carrier +ORDER BY Carrier; ``` Q7. Процент полетов, задержанных на более 10 минут, в разбивке по годам @@ -275,41 +306,51 @@ ANY INNER JOIN from ontime GROUP BY Year ) USING (Year) -ORDER BY Year +ORDER BY Year; ``` Более оптимальная версия того же запроса: ``` sql -SELECT Year, avg(DepDelay > 10) FROM ontime GROUP BY Year ORDER BY Year +SELECT Year, avg(DepDelay>10) +FROM ontime +GROUP BY Year +ORDER BY Year; ``` Q8. Самые популярные направления по количеству напрямую соединенных городов для различных диапазонов лет ``` sql -SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime WHERE Year >= 2000 and Year <= 2010 GROUP BY DestCityName ORDER BY u DESC LIMIT 10; +SELECT DestCityName, uniqExact(OriginCityName) AS u F +ROM ontime +WHERE Year>=2000 and Year<=2010 +GROUP BY DestCityName +ORDER BY u DESC +LIMIT 10; ``` Q9. ``` sql -select Year, count(*) as c1 from ontime group by Year; +SELECT Year, count(*) AS c1 +FROM ontime +GROUP BY Year; ``` Q10. ``` sql -select - min(Year), max(Year), Carrier, count(*) as cnt, - sum(ArrDelayMinutes>30) as flights_delayed, - round(sum(ArrDelayMinutes>30)/count(*),2) as rate +SELECT + min(Year), max(Year), Carrier, count(*) AS cnt, + sum(ArrDelayMinutes>30) AS flights_delayed, + round(sum(ArrDelayMinutes>30)/count(*),2) AS rate FROM ontime WHERE - DayOfWeek not in (6,7) and OriginState not in ('AK', 'HI', 'PR', 'VI') - and DestState not in ('AK', 'HI', 'PR', 'VI') - and FlightDate < '2010-01-01' + DayOfWeek NOT IN (6,7) AND OriginState NOT IN ('AK', 'HI', 'PR', 'VI') + AND DestState NOT IN ('AK', 'HI', 'PR', 'VI') + AND FlightDate < '2010-01-01' GROUP by Carrier -HAVING cnt > 100000 and max(Year) > 1990 +HAVING cnt>100000 and max(Year)>1990 ORDER by rate DESC LIMIT 1000; ``` @@ -317,15 +358,39 @@ LIMIT 1000; Бонус: ``` sql -SELECT avg(cnt) FROM (SELECT Year,Month,count(*) AS cnt FROM ontime WHERE DepDel15=1 GROUP BY Year,Month) +SELECT avg(cnt) +FROM +( + SELECT Year,Month,count(*) AS cnt + FROM ontime + WHERE DepDel15=1 + GROUP BY Year,Month +); -select avg(c1) from (select Year,Month,count(*) as c1 from ontime group by Year,Month) +SELECT avg(c1) FROM +( + SELECT Year,Month,count(*) AS c1 + FROM ontime + GROUP BY Year,Month +); -SELECT DestCityName, uniqExact(OriginCityName) AS u FROM ontime GROUP BY DestCityName ORDER BY u DESC LIMIT 10; +SELECT DestCityName, uniqExact(OriginCityName) AS u +FROM ontime +GROUP BY DestCityName +ORDER BY u DESC +LIMIT 10; -SELECT OriginCityName, DestCityName, count() AS c FROM ontime GROUP BY OriginCityName, DestCityName ORDER BY c DESC LIMIT 10; +SELECT OriginCityName, DestCityName, count() AS c +FROM ontime +GROUP BY OriginCityName, DestCityName +ORDER BY c DESC +LIMIT 10; -SELECT OriginCityName, count() AS c FROM ontime GROUP BY OriginCityName ORDER BY c DESC LIMIT 10; +SELECT OriginCityName, count() AS c +FROM ontime +GROUP BY OriginCityName +ORDER BY c DESC +LIMIT 10; ``` Данный тест производительности был создан Вадимом Ткаченко, статьи по теме: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 071f99d48d0..e320231d397 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -667,4 +667,22 @@ load_balancing = first_or_random - [insert_quorum](#settings-insert_quorum) - [insert_quorum_timeout](#settings-insert_quorum_timeout) +## allow_experimental_cross_to_join_conversion {#settings-allow_experimental_cross_to_join_conversion} + +Включает или отключает: + +1. Перезапись запросов из синтаксиса с запятыми в синтаксис `JOIN ON/USING`. Если значение параметра равно 0, ClickHouse не обрабатывает запросы с синтаксисом, использующим запятые, и генерирует исключение. +2. Преобразование `CROSS JOIN` в `INNER JOIN`, если условия объединения таблиц это позволяют. + +Возможные значения: + +- 0 — выключена. +- 1 — включена. + +Значение по умолчанию: 1. + +**Смотрите также** + +- [Множественный JOIN](../../query_language/select.md#select-join) + [Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/table_engines/odbc.md b/docs/ru/operations/table_engines/odbc.md index 31f95a56524..c3abd18aaa3 100644 --- a/docs/ru/operations/table_engines/odbc.md +++ b/docs/ru/operations/table_engines/odbc.md @@ -1,29 +1,122 @@ -# ODBC +# ODBC {#table_engine-odbc} -Allows ClickHouse to connect to external databases via [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). +Позволяет ClickHouse подключаться к внешним базам данных с помощью [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). -To implement ODBC connection, ClickHouse uses the separate program `clickhouse-odbc-bridge`. ClickHouse starts this program automatically when it is required. The ODBC bridge program is installed by the same package with the ClickHouse server. +Чтобы использование ODBC было безопасным, ClickHouse использует отдельную программу `clickhouse-odbc-bridge`. Если драйвер ODBC подгружать непосредственно из `clickhouse-server`, то проблемы с драйвером могут привести к аварийной остановке сервера ClickHouse. ClickHouse автоматически запускает `clickhouse-odbc-bridge` по мере необходимости. Программа устанавливается из того же пакета, что и `clickhouse-server`. -This engine supports the [Nullable](../../data_types/nullable.md) data type. +Движок поддерживает тип данных [Nullable](../../data_types/nullable.md). -## Creating a Table +## Создание таблицы ``` -CREATE TABLE [IF NOT EXISTS] [db.]table_name ENGINE = ODBC(connection_settings, external_database, external_table) +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1], + name2 [type2], + ... +) +ENGINE = ODBC(connection_settings, external_database, external_table) ``` -**Engine Parameters** +Смотрите подробное описание запроса [CREATE TABLE](../../query_language/create.md#create-table-query). -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Database in an external DBMS. -- `external_table` — A name of the table in `external_database`. +Структура таблицы может отличаться от структуры исходной таблицы в удалённой СУБД: -## Usage Example +- Имена столбцов должны быть такими же, как в исходной таблице, но вы можете использовать только некоторые из этих столбцов и в любом порядке. +- Типы столбцов могут отличаться от типов аналогичных столбцов в исходной таблице. ClickHouse пытается [приводить](../../query_language/functions/type_conversion_functions.md#type_conversion_function-cast) значения к типам данных ClickHouse. -Some examples of how to use external dictionaries through ODBC you can find in the [ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) section of external dictionaries configuration. +**Параметры движка** -## See Also +- `connection_settings` — название секции с настройками соединения в файле `odbc.ini`. +- `external_database` — имя базы данных во внешней СУБД. +- `external_table` — имя таблицы в `external_database`. -- [ODBC table function](../../query_language/table_functions/odbc.md). +## Пример использования -[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/jdbc/) +**Извлечение данных из локальной установки MySQL через ODBC** + +Этот пример проверялся в Ubuntu Linux 18.04 для MySQL server 5.7. + +Убедитесь, что unixODBC и MySQL Connector установлены. + +По умолчанию (если установлен из пакетов) ClickHouse запускается от имени пользователя `clickhouse`. Таким образом, вам нужно создать и настроить этого пользователя на сервере MySQL. + +``` +sudo mysql +mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; +``` + +Теперь настроим соединение в `/etc/odbc.ini`. + +``` +$ cat /etc/odbc.ini +[mysqlconn] +DRIVER = /usr/local/lib/libmyodbc5w.so +SERVER = 127.0.0.1 +PORT = 3306 +DATABASE = test +USERNAME = clickhouse +PASSWORD = clickhouse +``` + +Вы можете проверить соединение с помощью утилиты `isql` из установки unixODBC. + +``` +isql -v mysqlconn ++---------------------------------------+ +| Connected! | +| | +... +``` + +Таблица в MySQL: + +``` +mysql> CREATE TABLE `test`.`test` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `int_nullable` INT NULL DEFAULT NULL, + -> `float` FLOAT NOT NULL, + -> `float_nullable` FLOAT NULL DEFAULT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into test (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from test; ++--------+--------------+-------+----------------+ +| int_id | int_nullable | float | float_nullable | ++--------+--------------+-------+----------------+ +| 1 | NULL | 2 | NULL | ++--------+--------------+-------+----------------+ +1 row in set (0,00 sec) +``` + +Таблица в ClickHouse, которая получает данные из таблицы MySQL: + +```sql +CREATE TABLE odbc_t +( + `int_id` Int32, + `float_nullable` Nullable(Float32) +) +ENGINE = ODBC('DSN=mysqlconn', 'test', 'test') +``` + +```sql +SELECT * FROM odbc_t +``` + +```text +┌─int_id─┬─float_nullable─┐ +│ 1 │ ᴺᵁᴸᴸ │ +└────────┴────────────────┘ +``` + +## Смотрите также + +- [Внешние словари ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) +- [Табличная функция odbc](../../query_language/table_functions/odbc.md) + +[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/odbc/) diff --git a/docs/ru/query_language/agg_functions/reference.md b/docs/ru/query_language/agg_functions/reference.md index 632c4a47643..faf7936c984 100644 --- a/docs/ru/query_language/agg_functions/reference.md +++ b/docs/ru/query_language/agg_functions/reference.md @@ -597,12 +597,12 @@ simpleLinearRegression(x, y) Параметры: -- `x` — Столбец со значениями зависимой переменной. +- `x` — столбец со значениями зависимой переменной. - `y` — столбец со значениями наблюдаемой переменной. Возвращаемые значения: -Параметры `(a, b)` результирующей прямой `x = a*y + b`. +Константы `(a, b)` результирующей прямой `y = a*x + b`. **Примеры** diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md index 3c2a4cfa154..614c56ee44e 100644 --- a/docs/ru/query_language/select.md +++ b/docs/ru/query_language/select.md @@ -202,7 +202,7 @@ VALUES ('Hello', [1,2]), ('World', [3,4,5]), ('Goodbye', []); В примере ниже используется `ARRAY JOIN`: -``` sql +```sql SELECT s, arr FROM arrays_test ARRAY JOIN arr; @@ -219,7 +219,7 @@ ARRAY JOIN arr; Следующий пример использует `LEFT ARRAY JOIN`: -``` sql +```sql SELECT s, arr FROM arrays_test LEFT ARRAY JOIN arr; @@ -431,7 +431,7 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; Соединяет данные в привычном для [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) смысле. !!! info "Примечание" - Не связана с функциональностью с [ARRAY JOIN](#select-array-join-clause). + Не связана с функциональностью [ARRAY JOIN](#select-array-join-clause). ```sql SELECT @@ -452,7 +452,29 @@ FROM Смотрите описание стандартного [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)). -**Строгость ANY или ALL** +#### Множественный JOIN + +При выполнении запросов, ClickHouse перезаписывает множественный `JOIN` как комбинацию двух-табличных объединений и обрабатывает их последовательно. Например, если необходимо объединить четыре таблицы, ClickHouse объединяет первую и вторую таблицы, затем соединяет результат с третьей, а затем с четвертой. + +Если запрос содержит секцию `WHERE`, ClickHouse пытается пробросить фильтры из этой секции в промежуточный `JOIN`. Если он не может пробросить фильтр в каждый промежуточный `JOIN`, ClickHouse применяет фильтры после того, как все `JOIN` будут выполнены. + +Для создания запросов мы рекомендуем использоват синтаксис `JOIN ON` или `JOIN USING`. Например: + +``` +SELECT * FROM t1 JOIN t2 ON t1.a = t2.a JOIN t3 ON t1.a = t3.a +``` + +В секции `FROM` вы можете использовать разделенные запятыми списки таблиц для объединения. Этот синтаксис работает только при включённой настройке [allow_experimental_cross_to_join_conversion = 1](../operations/settings/settings.md#settings-allow_experimental_cross_to_join_conversion). Например: + +``` +SELECT * FROM t1, t2, t3 WHERE t1.a = t2.a AND t1.a = t3.a +``` + +Не смешивайте синтаксисы. + +ClickHouse не поддерживает синтаксис с запятыми напрямую и мы не рекомендуем его использовать. Алгоритм пытается переписать запрос с помощью секций `CROSS JOIN` и `INNER JOIN` и затем продолжает его выполнение. При переписывании запроса, ClickHouse пытается оптимизировать производительность и потребление памяти. По умолчанию, ClickHouse трактует запятые как `INNER JOIN` и конвертирует их в `CROSS JOIN` когда не может гарантировать, что `INNER JOIN` возвращает запрошенные данные. + +#### ANY или ALL - строгость: Если указано `ALL`, то при наличии в "правой" таблице нескольких соответствующих строк, данные будут размножены по количеству этих строк. Это нормальное поведение `JOIN` как в стандартном SQL. Если указано `ANY`, то при наличии в "правой" таблице нескольких соответствующих строк, будет присоединена только первая попавшаяся. Если известно, что в "правой" таблице есть не более одной подходящей строки, то результаты `ANY` и `ALL` совпадают. @@ -534,6 +556,17 @@ LIMIT 10 Если ключами `JOIN` выступают поля типа [Nullable](../data_types/nullable.md), то строки, где хотя бы один из ключей имеет значение [NULL](syntax.md#null-literal), не соединяются. +#### Ограничения синтаксиса + +Для множественных секций `JOIN` в одном запросе `SELECT`: + +- Получение всех столбцов через `*` возможно только при объединении таблиц, но не подзапросов. +- Секция `PREWHERE` недоступна. + +Для секций `ON`, `WHERE` и `GROUP BY`: + +- Нельзя использовать произвольные выражения в секциях `ON`, `WHERE`, и `GROUP BY`, однако можно определить выражение в секции `SELECT` и затем использовать его через алиас в других секциях. + ### Секция WHERE {#select-where} Позволяет задать выражение, которое ClickHouse использует для фильтрации данных перед всеми другими действиями в запросе кроме выражений, содержащихся в секции [PREWHERE](#select-prewhere). Обычно, это выражение с логическими операторами. diff --git a/docs/ru/query_language/table_functions/odbc.md b/docs/ru/query_language/table_functions/odbc.md index 78095916fb6..9be84e4fc98 100644 --- a/docs/ru/query_language/table_functions/odbc.md +++ b/docs/ru/query_language/table_functions/odbc.md @@ -1,27 +1,98 @@ -# odbc +# odbc {#table_functions-odbc} -Returns table that is connected via ODBC driver. +Возвращает таблицу, подключенную через [ODBC](https://en.wikipedia.org/wiki/Open_Database_Connectivity). ``` odbc(connection_settings, external_database, external_table) ``` -**Function Parameters** +Параметры: -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Database in an external DBMS. -- `external_table` — A name of the table in `external_database`. +- `connection_settings` — название секции с настройками соединения в файле `odbc.ini`. +- `external_database` — имя базы данных во внешней СУБД. +- `external_table` — имя таблицы в `external_database`. -To implement ODBC connection, ClickHouse uses the separate program `clickhouse-odbc-bridge`. ClickHouse starts this program automatically when it is required. The ODBC bridge program is installed by the same package with the ClickHouse server. +Чтобы использование ODBC было безопасным, ClickHouse использует отдельную программу `clickhouse-odbc-bridge`. Если драйвер ODBC подгружать непосредственно из `clickhouse-server`, то проблемы с драйвером могут привести к аварийной остановке сервера ClickHouse. ClickHouse автоматически запускает `clickhouse-odbc-bridge` по мере необходимости. Программа устанавливается из того же пакета, что и `clickhouse-server`. -This function supports the [Nullable](../../data_types/nullable.md) data type (based on DDL of remote table that is queried). +Поля из внешней таблицы со значениями `NULL` получают значение по умолчанию для базового типа данных. Например, если поле в удалённой таблице MySQL имеет тип `INT NULL` оно сконвертируется в 0 (значение по умолчанию для типа данных ClickHouse `Int32`). -**Examples** +## Пример использования + +**Получение данных из локальной установки MySQL через ODBC** + +Этот пример проверялся в Ubuntu Linux 18.04 для MySQL server 5.7. + +Убедитесь, что unixODBC и MySQL Connector установлены. + +По умолчанию (если установлен из пакетов) ClickHouse запускается от имени пользователя `clickhouse`. Таким образом, вам нужно создать и настроить этого пользователя на сервере MySQL. ``` -select * from odbc('DSN=connection_settings_name', 'external_database_name', 'external_table_name') +sudo mysql +mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; ``` -Some examples of how to use external dictionaries through ODBC you can find in the [ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) section of external dictionaries configuration. +Теперь настроим соединение в `/etc/odbc.ini`. -[Original article](https://clickhouse.yandex/docs/en/query_language/table_functions/jdbc/) +``` +$ cat /etc/odbc.ini +[mysqlconn] +DRIVER = /usr/local/lib/libmyodbc5w.so +SERVER = 127.0.0.1 +PORT = 3306 +DATABASE = test +USERNAME = clickhouse +PASSWORD = clickhouse +``` + +Вы можете проверить соединение с помощью утилиты `isql` из установки unixODBC. + +``` +isql -v mysqlconn ++---------------------------------------+ +| Connected! | +| | +... +``` + +Таблица в MySQL: + +``` +mysql> CREATE TABLE `test`.`test` ( + -> `int_id` INT NOT NULL AUTO_INCREMENT, + -> `int_nullable` INT NULL DEFAULT NULL, + -> `float` FLOAT NOT NULL, + -> `float_nullable` FLOAT NULL DEFAULT NULL, + -> PRIMARY KEY (`int_id`)); +Query OK, 0 rows affected (0,09 sec) + +mysql> insert into test (`int_id`, `float`) VALUES (1,2); +Query OK, 1 row affected (0,00 sec) + +mysql> select * from test; ++--------+--------------+-------+----------------+ +| int_id | int_nullable | float | float_nullable | ++--------+--------------+-------+----------------+ +| 1 | NULL | 2 | NULL | ++--------+--------------+-------+----------------+ +1 row in set (0,00 sec) +``` + +Получение данных из таблицы MySQL в ClickHouse: + +```sql +SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') +``` + +```text +┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐ +│ 1 │ 0 │ 2 │ 0 │ +└────────┴──────────────┴───────┴────────────────┘ +``` + +## Смотрите также + +- [Внешние словари ODBC](../../query_language/dicts/external_dicts_dict_sources.md#dicts-external_dicts_dict_sources-odbc) +- [Движок таблиц ODBC](../../operations/table_engines/odbc.md). + +[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/table_functions/jdbc/) diff --git a/website/index.html b/website/index.html index db45c3b26b2..0f705fcf5e9 100644 --- a/website/index.html +++ b/website/index.html @@ -94,7 +94,7 @@
- Upcoming ClickHouse Meetup: Shenzhen on October 20 + Upcoming Meetups: Novosibirsk on June 26 and Shenzhen on October 20